LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitATOMIC_STORE(SDNode *N);
534 SDValue visitLIFETIME_END(SDNode *N);
535 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
536 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
537 SDValue visitBUILD_VECTOR(SDNode *N);
538 SDValue visitCONCAT_VECTORS(SDNode *N);
539 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_SHUFFLE(SDNode *N);
541 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
542 SDValue visitINSERT_SUBVECTOR(SDNode *N);
543 SDValue visitMLOAD(SDNode *N);
544 SDValue visitMSTORE(SDNode *N);
545 SDValue visitMGATHER(SDNode *N);
546 SDValue visitMSCATTER(SDNode *N);
547 SDValue visitVPGATHER(SDNode *N);
548 SDValue visitVPSCATTER(SDNode *N);
549 SDValue visitVP_STRIDED_LOAD(SDNode *N);
550 SDValue visitVP_STRIDED_STORE(SDNode *N);
551 SDValue visitFP_TO_FP16(SDNode *N);
552 SDValue visitFP16_TO_FP(SDNode *N);
553 SDValue visitFP_TO_BF16(SDNode *N);
554 SDValue visitBF16_TO_FP(SDNode *N);
555 SDValue visitVECREDUCE(SDNode *N);
556 SDValue visitVPOp(SDNode *N);
557 SDValue visitGET_FPENV_MEM(SDNode *N);
558 SDValue visitSET_FPENV_MEM(SDNode *N);
559
560 template <class MatchContextClass>
561 SDValue visitFADDForFMACombine(SDNode *N);
562 template <class MatchContextClass>
563 SDValue visitFSUBForFMACombine(SDNode *N);
564 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
565
566 SDValue XformToShuffleWithZero(SDNode *N);
567 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
568 const SDLoc &DL,
569 SDNode *N,
570 SDValue N0,
571 SDValue N1);
572 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
575 SDValue N1, SDNodeFlags Flags);
576 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
577 EVT VT, SDValue N0, SDValue N1,
578 SDNodeFlags Flags = SDNodeFlags());
579
580 SDValue visitShiftByConstant(SDNode *N);
581
582 SDValue foldSelectOfConstants(SDNode *N);
583 SDValue foldVSelectOfConstants(SDNode *N);
584 SDValue foldBinOpIntoSelect(SDNode *BO);
585 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
586 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
587 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
588 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
590 bool NotExtCompare = false);
591 SDValue convertSelectOfFPConstantsToLoadOffset(
592 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
594 SDValue foldSignChangeInBitcast(SDNode *N);
595 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
597 SDValue foldSelectOfBinops(SDNode *N);
598 SDValue foldSextSetcc(SDNode *N);
599 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
600 const SDLoc &DL);
601 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
602 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Returns a type large enough to hold any valid shift amount - before type
850 /// legalization these can be huge.
851 EVT getShiftAmountTy(EVT LHSTy) {
852 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
853 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
854 }
855
856 /// This method returns true if we are running before type legalization or
857 /// if the specified VT is legal.
858 bool isTypeLegal(const EVT &VT) {
859 if (!LegalTypes) return true;
860 return TLI.isTypeLegal(VT);
861 }
862
863 /// Convenience wrapper around TargetLowering::getSetCCResultType
864 EVT getSetCCResultType(EVT VT) const {
865 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
866 }
867
868 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
869 SDValue OrigLoad, SDValue ExtLoad,
870 ISD::NodeType ExtType);
871 };
872
873/// This class is a DAGUpdateListener that removes any deleted
874/// nodes from the worklist.
875class WorklistRemover : public SelectionDAG::DAGUpdateListener {
876 DAGCombiner &DC;
877
878public:
879 explicit WorklistRemover(DAGCombiner &dc)
880 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
881
882 void NodeDeleted(SDNode *N, SDNode *E) override {
883 DC.removeFromWorklist(N);
884 }
885};
886
887class WorklistInserter : public SelectionDAG::DAGUpdateListener {
888 DAGCombiner &DC;
889
890public:
891 explicit WorklistInserter(DAGCombiner &dc)
892 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
893
894 // FIXME: Ideally we could add N to the worklist, but this causes exponential
895 // compile time costs in large DAGs, e.g. Halide.
896 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
897};
898
899} // end anonymous namespace
900
901//===----------------------------------------------------------------------===//
902// TargetLowering::DAGCombinerInfo implementation
903//===----------------------------------------------------------------------===//
904
906 ((DAGCombiner*)DC)->AddToWorklist(N);
907}
908
910CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
911 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
912}
913
915CombineTo(SDNode *N, SDValue Res, bool AddTo) {
916 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
917}
918
920CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
922}
923
926 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
927}
928
931 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
932}
933
934//===----------------------------------------------------------------------===//
935// Helper Functions
936//===----------------------------------------------------------------------===//
937
938void DAGCombiner::deleteAndRecombine(SDNode *N) {
939 removeFromWorklist(N);
940
941 // If the operands of this node are only used by the node, they will now be
942 // dead. Make sure to re-visit them and recursively delete dead nodes.
943 for (const SDValue &Op : N->ops())
944 // For an operand generating multiple values, one of the values may
945 // become dead allowing further simplification (e.g. split index
946 // arithmetic from an indexed load).
947 if (Op->hasOneUse() || Op->getNumValues() > 1)
948 AddToWorklist(Op.getNode());
949
950 DAG.DeleteNode(N);
951}
952
953// APInts must be the same size for most operations, this helper
954// function zero extends the shorter of the pair so that they match.
955// We provide an Offset so that we can create bitwidths that won't overflow.
956static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
957 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
958 LHS = LHS.zext(Bits);
959 RHS = RHS.zext(Bits);
960}
961
962// Return true if this node is a setcc, or is a select_cc
963// that selects between the target values used for true and false, making it
964// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
965// the appropriate nodes based on the type of node we are checking. This
966// simplifies life a bit for the callers.
967bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
968 SDValue &CC, bool MatchStrict) const {
969 if (N.getOpcode() == ISD::SETCC) {
970 LHS = N.getOperand(0);
971 RHS = N.getOperand(1);
972 CC = N.getOperand(2);
973 return true;
974 }
975
976 if (MatchStrict &&
977 (N.getOpcode() == ISD::STRICT_FSETCC ||
978 N.getOpcode() == ISD::STRICT_FSETCCS)) {
979 LHS = N.getOperand(1);
980 RHS = N.getOperand(2);
981 CC = N.getOperand(3);
982 return true;
983 }
984
985 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
986 !TLI.isConstFalseVal(N.getOperand(3)))
987 return false;
988
989 if (TLI.getBooleanContents(N.getValueType()) ==
991 return false;
992
993 LHS = N.getOperand(0);
994 RHS = N.getOperand(1);
995 CC = N.getOperand(4);
996 return true;
997}
998
999/// Return true if this is a SetCC-equivalent operation with only one use.
1000/// If this is true, it allows the users to invert the operation for free when
1001/// it is profitable to do so.
1002bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1003 SDValue N0, N1, N2;
1004 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1005 return true;
1006 return false;
1007}
1008
1010 if (!ScalarTy.isSimple())
1011 return false;
1012
1013 uint64_t MaskForTy = 0ULL;
1014 switch (ScalarTy.getSimpleVT().SimpleTy) {
1015 case MVT::i8:
1016 MaskForTy = 0xFFULL;
1017 break;
1018 case MVT::i16:
1019 MaskForTy = 0xFFFFULL;
1020 break;
1021 case MVT::i32:
1022 MaskForTy = 0xFFFFFFFFULL;
1023 break;
1024 default:
1025 return false;
1026 break;
1027 }
1028
1029 APInt Val;
1030 if (ISD::isConstantSplatVector(N, Val))
1031 return Val.getLimitedValue() == MaskForTy;
1032
1033 return false;
1034}
1035
1036// Determines if it is a constant integer or a splat/build vector of constant
1037// integers (and undefs).
1038// Do not permit build vector implicit truncation.
1039static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1040 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1041 return !(Const->isOpaque() && NoOpaques);
1042 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1043 return false;
1044 unsigned BitWidth = N.getScalarValueSizeInBits();
1045 for (const SDValue &Op : N->op_values()) {
1046 if (Op.isUndef())
1047 continue;
1048 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1049 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1050 (Const->isOpaque() && NoOpaques))
1051 return false;
1052 }
1053 return true;
1054}
1055
1056// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1057// undef's.
1058static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 return isConstantOrConstantVector(V, NoOpaques) ||
1063}
1064
1065// Determine if this an indexed load with an opaque target constant index.
1066static bool canSplitIdx(LoadSDNode *LD) {
1067 return MaySplitLoadIndex &&
1068 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1069 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1070}
1071
1072bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1073 const SDLoc &DL,
1074 SDNode *N,
1075 SDValue N0,
1076 SDValue N1) {
1077 // Currently this only tries to ensure we don't undo the GEP splits done by
1078 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1079 // we check if the following transformation would be problematic:
1080 // (load/store (add, (add, x, offset1), offset2)) ->
1081 // (load/store (add, x, offset1+offset2)).
1082
1083 // (load/store (add, (add, x, y), offset2)) ->
1084 // (load/store (add, (add, x, offset2), y)).
1085
1086 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1087 return false;
1088
1089 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1090 if (!C2)
1091 return false;
1092
1093 const APInt &C2APIntVal = C2->getAPIntValue();
1094 if (C2APIntVal.getSignificantBits() > 64)
1095 return false;
1096
1097 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1098 if (N0.hasOneUse())
1099 return false;
1100
1101 const APInt &C1APIntVal = C1->getAPIntValue();
1102 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1103 if (CombinedValueIntVal.getSignificantBits() > 64)
1104 return false;
1105 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1106
1107 for (SDNode *Node : N->uses()) {
1108 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1109 // Is x[offset2] already not a legal addressing mode? If so then
1110 // reassociating the constants breaks nothing (we test offset2 because
1111 // that's the one we hope to fold into the load or store).
1113 AM.HasBaseReg = true;
1114 AM.BaseOffs = C2APIntVal.getSExtValue();
1115 EVT VT = LoadStore->getMemoryVT();
1116 unsigned AS = LoadStore->getAddressSpace();
1117 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1118 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1119 continue;
1120
1121 // Would x[offset1+offset2] still be a legal addressing mode?
1122 AM.BaseOffs = CombinedValue;
1123 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1124 return true;
1125 }
1126 }
1127 } else {
1128 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1129 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1130 return false;
1131
1132 for (SDNode *Node : N->uses()) {
1133 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1134 if (!LoadStore)
1135 return false;
1136
1137 // Is x[offset2] a legal addressing mode? If so then
1138 // reassociating the constants breaks address pattern
1140 AM.HasBaseReg = true;
1141 AM.BaseOffs = C2APIntVal.getSExtValue();
1142 EVT VT = LoadStore->getMemoryVT();
1143 unsigned AS = LoadStore->getAddressSpace();
1144 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1145 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1146 return false;
1147 }
1148 return true;
1149 }
1150
1151 return false;
1152}
1153
1154/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1155/// \p N0 is the same kind of operation as \p Opc.
1156SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1157 SDValue N0, SDValue N1,
1158 SDNodeFlags Flags) {
1159 EVT VT = N0.getValueType();
1160
1161 if (N0.getOpcode() != Opc)
1162 return SDValue();
1163
1164 SDValue N00 = N0.getOperand(0);
1165 SDValue N01 = N0.getOperand(1);
1166
1168 SDNodeFlags NewFlags;
1169 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1170 Flags.hasNoUnsignedWrap())
1171 NewFlags.setNoUnsignedWrap(true);
1172
1174 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1175 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1176 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1177 return SDValue();
1178 }
1179 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1180 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1181 // iff (op x, c1) has one use
1182 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1183 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1184 }
1185 }
1186
1187 // Check for repeated operand logic simplifications.
1188 if (Opc == ISD::AND || Opc == ISD::OR) {
1189 // (N00 & N01) & N00 --> N00 & N01
1190 // (N00 & N01) & N01 --> N00 & N01
1191 // (N00 | N01) | N00 --> N00 | N01
1192 // (N00 | N01) | N01 --> N00 | N01
1193 if (N1 == N00 || N1 == N01)
1194 return N0;
1195 }
1196 if (Opc == ISD::XOR) {
1197 // (N00 ^ N01) ^ N00 --> N01
1198 if (N1 == N00)
1199 return N01;
1200 // (N00 ^ N01) ^ N01 --> N00
1201 if (N1 == N01)
1202 return N00;
1203 }
1204
1205 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1206 if (N1 != N01) {
1207 // Reassociate if (op N00, N1) already exist
1208 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1209 // if Op (Op N00, N1), N01 already exist
1210 // we need to stop reassciate to avoid dead loop
1211 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1212 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1213 }
1214 }
1215
1216 if (N1 != N00) {
1217 // Reassociate if (op N01, N1) already exist
1218 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1219 // if Op (Op N01, N1), N00 already exist
1220 // we need to stop reassciate to avoid dead loop
1221 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1222 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1223 }
1224 }
1225
1226 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1227 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1228 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1229 // comparisons with the same predicate. This enables optimizations as the
1230 // following one:
1231 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1232 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1233 if (Opc == ISD::AND || Opc == ISD::OR) {
1234 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1235 N01->getOpcode() == ISD::SETCC) {
1236 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1237 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1238 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1239 if (CC1 == CC00 && CC1 != CC01) {
1240 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1241 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1242 }
1243 if (CC1 == CC01 && CC1 != CC00) {
1244 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1245 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1246 }
1247 }
1248 }
1249 }
1250
1251 return SDValue();
1252}
1253
1254/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1255/// same kind of operation as \p Opc.
1256SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1257 SDValue N1, SDNodeFlags Flags) {
1258 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1259
1260 // Floating-point reassociation is not allowed without loose FP math.
1261 if (N0.getValueType().isFloatingPoint() ||
1263 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1264 return SDValue();
1265
1266 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1267 return Combined;
1268 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1269 return Combined;
1270 return SDValue();
1271}
1272
1273// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1274// Note that we only expect Flags to be passed from FP operations. For integer
1275// operations they need to be dropped.
1276SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1277 const SDLoc &DL, EVT VT, SDValue N0,
1278 SDValue N1, SDNodeFlags Flags) {
1279 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1280 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1281 N0->hasOneUse() && N1->hasOneUse() &&
1283 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1284 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1285 return DAG.getNode(RedOpc, DL, VT,
1286 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1287 N0.getOperand(0), N1.getOperand(0)));
1288 }
1289 return SDValue();
1290}
1291
1292SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1293 bool AddTo) {
1294 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1295 ++NodesCombined;
1296 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1297 To[0].dump(&DAG);
1298 dbgs() << " and " << NumTo - 1 << " other values\n");
1299 for (unsigned i = 0, e = NumTo; i != e; ++i)
1300 assert((!To[i].getNode() ||
1301 N->getValueType(i) == To[i].getValueType()) &&
1302 "Cannot combine value to value of different type!");
1303
1304 WorklistRemover DeadNodes(*this);
1305 DAG.ReplaceAllUsesWith(N, To);
1306 if (AddTo) {
1307 // Push the new nodes and any users onto the worklist
1308 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1309 if (To[i].getNode())
1310 AddToWorklistWithUsers(To[i].getNode());
1311 }
1312 }
1313
1314 // Finally, if the node is now dead, remove it from the graph. The node
1315 // may not be dead if the replacement process recursively simplified to
1316 // something else needing this node.
1317 if (N->use_empty())
1318 deleteAndRecombine(N);
1319 return SDValue(N, 0);
1320}
1321
1322void DAGCombiner::
1323CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1324 // Replace the old value with the new one.
1325 ++NodesCombined;
1326 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1327 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1328
1329 // Replace all uses.
1330 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1331
1332 // Push the new node and any (possibly new) users onto the worklist.
1333 AddToWorklistWithUsers(TLO.New.getNode());
1334
1335 // Finally, if the node is now dead, remove it from the graph.
1336 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1337}
1338
1339/// Check the specified integer node value to see if it can be simplified or if
1340/// things it uses can be simplified by bit propagation. If so, return true.
1341bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1342 const APInt &DemandedElts,
1343 bool AssumeSingleUse) {
1344 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1345 KnownBits Known;
1346 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1347 AssumeSingleUse))
1348 return false;
1349
1350 // Revisit the node.
1351 AddToWorklist(Op.getNode());
1352
1353 CommitTargetLoweringOpt(TLO);
1354 return true;
1355}
1356
1357/// Check the specified vector node value to see if it can be simplified or
1358/// if things it uses can be simplified as it only uses some of the elements.
1359/// If so, return true.
1360bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1361 const APInt &DemandedElts,
1362 bool AssumeSingleUse) {
1363 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1364 APInt KnownUndef, KnownZero;
1365 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1366 TLO, 0, AssumeSingleUse))
1367 return false;
1368
1369 // Revisit the node.
1370 AddToWorklist(Op.getNode());
1371
1372 CommitTargetLoweringOpt(TLO);
1373 return true;
1374}
1375
1376void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1377 SDLoc DL(Load);
1378 EVT VT = Load->getValueType(0);
1379 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1380
1381 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1382 Trunc.dump(&DAG); dbgs() << '\n');
1383
1384 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1385 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1386
1387 AddToWorklist(Trunc.getNode());
1388 recursivelyDeleteUnusedNodes(Load);
1389}
1390
1391SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1392 Replace = false;
1393 SDLoc DL(Op);
1394 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1395 LoadSDNode *LD = cast<LoadSDNode>(Op);
1396 EVT MemVT = LD->getMemoryVT();
1398 : LD->getExtensionType();
1399 Replace = true;
1400 return DAG.getExtLoad(ExtType, DL, PVT,
1401 LD->getChain(), LD->getBasePtr(),
1402 MemVT, LD->getMemOperand());
1403 }
1404
1405 unsigned Opc = Op.getOpcode();
1406 switch (Opc) {
1407 default: break;
1408 case ISD::AssertSext:
1409 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1410 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1411 break;
1412 case ISD::AssertZext:
1413 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1414 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1415 break;
1416 case ISD::Constant: {
1417 unsigned ExtOpc =
1418 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1419 return DAG.getNode(ExtOpc, DL, PVT, Op);
1420 }
1421 }
1422
1423 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1424 return SDValue();
1425 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1426}
1427
1428SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1430 return SDValue();
1431 EVT OldVT = Op.getValueType();
1432 SDLoc DL(Op);
1433 bool Replace = false;
1434 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1435 if (!NewOp.getNode())
1436 return SDValue();
1437 AddToWorklist(NewOp.getNode());
1438
1439 if (Replace)
1440 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1441 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1442 DAG.getValueType(OldVT));
1443}
1444
1445SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1446 EVT OldVT = Op.getValueType();
1447 SDLoc DL(Op);
1448 bool Replace = false;
1449 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1450 if (!NewOp.getNode())
1451 return SDValue();
1452 AddToWorklist(NewOp.getNode());
1453
1454 if (Replace)
1455 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1456 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1457}
1458
1459/// Promote the specified integer binary operation if the target indicates it is
1460/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1461/// i32 since i16 instructions are longer.
1462SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1463 if (!LegalOperations)
1464 return SDValue();
1465
1466 EVT VT = Op.getValueType();
1467 if (VT.isVector() || !VT.isInteger())
1468 return SDValue();
1469
1470 // If operation type is 'undesirable', e.g. i16 on x86, consider
1471 // promoting it.
1472 unsigned Opc = Op.getOpcode();
1473 if (TLI.isTypeDesirableForOp(Opc, VT))
1474 return SDValue();
1475
1476 EVT PVT = VT;
1477 // Consult target whether it is a good idea to promote this operation and
1478 // what's the right type to promote it to.
1479 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1480 assert(PVT != VT && "Don't know what type to promote to!");
1481
1482 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1483
1484 bool Replace0 = false;
1485 SDValue N0 = Op.getOperand(0);
1486 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1487
1488 bool Replace1 = false;
1489 SDValue N1 = Op.getOperand(1);
1490 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1491 SDLoc DL(Op);
1492
1493 SDValue RV =
1494 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1495
1496 // We are always replacing N0/N1's use in N and only need additional
1497 // replacements if there are additional uses.
1498 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1499 // (SDValue) here because the node may reference multiple values
1500 // (for example, the chain value of a load node).
1501 Replace0 &= !N0->hasOneUse();
1502 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1503
1504 // Combine Op here so it is preserved past replacements.
1505 CombineTo(Op.getNode(), RV);
1506
1507 // If operands have a use ordering, make sure we deal with
1508 // predecessor first.
1509 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1510 std::swap(N0, N1);
1511 std::swap(NN0, NN1);
1512 }
1513
1514 if (Replace0) {
1515 AddToWorklist(NN0.getNode());
1516 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1517 }
1518 if (Replace1) {
1519 AddToWorklist(NN1.getNode());
1520 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1521 }
1522 return Op;
1523 }
1524 return SDValue();
1525}
1526
1527/// Promote the specified integer shift operation if the target indicates it is
1528/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1529/// i32 since i16 instructions are longer.
1530SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1531 if (!LegalOperations)
1532 return SDValue();
1533
1534 EVT VT = Op.getValueType();
1535 if (VT.isVector() || !VT.isInteger())
1536 return SDValue();
1537
1538 // If operation type is 'undesirable', e.g. i16 on x86, consider
1539 // promoting it.
1540 unsigned Opc = Op.getOpcode();
1541 if (TLI.isTypeDesirableForOp(Opc, VT))
1542 return SDValue();
1543
1544 EVT PVT = VT;
1545 // Consult target whether it is a good idea to promote this operation and
1546 // what's the right type to promote it to.
1547 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1548 assert(PVT != VT && "Don't know what type to promote to!");
1549
1550 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1551
1552 bool Replace = false;
1553 SDValue N0 = Op.getOperand(0);
1554 if (Opc == ISD::SRA)
1555 N0 = SExtPromoteOperand(N0, PVT);
1556 else if (Opc == ISD::SRL)
1557 N0 = ZExtPromoteOperand(N0, PVT);
1558 else
1559 N0 = PromoteOperand(N0, PVT, Replace);
1560
1561 if (!N0.getNode())
1562 return SDValue();
1563
1564 SDLoc DL(Op);
1565 SDValue N1 = Op.getOperand(1);
1566 SDValue RV =
1567 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1568
1569 if (Replace)
1570 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1571
1572 // Deal with Op being deleted.
1573 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1574 return RV;
1575 }
1576 return SDValue();
1577}
1578
1579SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1580 if (!LegalOperations)
1581 return SDValue();
1582
1583 EVT VT = Op.getValueType();
1584 if (VT.isVector() || !VT.isInteger())
1585 return SDValue();
1586
1587 // If operation type is 'undesirable', e.g. i16 on x86, consider
1588 // promoting it.
1589 unsigned Opc = Op.getOpcode();
1590 if (TLI.isTypeDesirableForOp(Opc, VT))
1591 return SDValue();
1592
1593 EVT PVT = VT;
1594 // Consult target whether it is a good idea to promote this operation and
1595 // what's the right type to promote it to.
1596 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1597 assert(PVT != VT && "Don't know what type to promote to!");
1598 // fold (aext (aext x)) -> (aext x)
1599 // fold (aext (zext x)) -> (zext x)
1600 // fold (aext (sext x)) -> (sext x)
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1603 }
1604 return SDValue();
1605}
1606
1607bool DAGCombiner::PromoteLoad(SDValue Op) {
1608 if (!LegalOperations)
1609 return false;
1610
1611 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1612 return false;
1613
1614 EVT VT = Op.getValueType();
1615 if (VT.isVector() || !VT.isInteger())
1616 return false;
1617
1618 // If operation type is 'undesirable', e.g. i16 on x86, consider
1619 // promoting it.
1620 unsigned Opc = Op.getOpcode();
1621 if (TLI.isTypeDesirableForOp(Opc, VT))
1622 return false;
1623
1624 EVT PVT = VT;
1625 // Consult target whether it is a good idea to promote this operation and
1626 // what's the right type to promote it to.
1627 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1628 assert(PVT != VT && "Don't know what type to promote to!");
1629
1630 SDLoc DL(Op);
1631 SDNode *N = Op.getNode();
1632 LoadSDNode *LD = cast<LoadSDNode>(N);
1633 EVT MemVT = LD->getMemoryVT();
1635 : LD->getExtensionType();
1636 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1637 LD->getChain(), LD->getBasePtr(),
1638 MemVT, LD->getMemOperand());
1639 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1640
1641 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1642 Result.dump(&DAG); dbgs() << '\n');
1643
1645 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1646
1647 AddToWorklist(Result.getNode());
1648 recursivelyDeleteUnusedNodes(N);
1649 return true;
1650 }
1651
1652 return false;
1653}
1654
1655/// Recursively delete a node which has no uses and any operands for
1656/// which it is the only use.
1657///
1658/// Note that this both deletes the nodes and removes them from the worklist.
1659/// It also adds any nodes who have had a user deleted to the worklist as they
1660/// may now have only one use and subject to other combines.
1661bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1662 if (!N->use_empty())
1663 return false;
1664
1666 Nodes.insert(N);
1667 do {
1668 N = Nodes.pop_back_val();
1669 if (!N)
1670 continue;
1671
1672 if (N->use_empty()) {
1673 for (const SDValue &ChildN : N->op_values())
1674 Nodes.insert(ChildN.getNode());
1675
1676 removeFromWorklist(N);
1677 DAG.DeleteNode(N);
1678 } else {
1679 AddToWorklist(N);
1680 }
1681 } while (!Nodes.empty());
1682 return true;
1683}
1684
1685//===----------------------------------------------------------------------===//
1686// Main DAG Combiner implementation
1687//===----------------------------------------------------------------------===//
1688
1689void DAGCombiner::Run(CombineLevel AtLevel) {
1690 // set the instance variables, so that the various visit routines may use it.
1691 Level = AtLevel;
1692 LegalDAG = Level >= AfterLegalizeDAG;
1693 LegalOperations = Level >= AfterLegalizeVectorOps;
1694 LegalTypes = Level >= AfterLegalizeTypes;
1695
1696 WorklistInserter AddNodes(*this);
1697
1698 // Add all the dag nodes to the worklist.
1699 //
1700 // Note: All nodes are not added to PruningList here, this is because the only
1701 // nodes which can be deleted are those which have no uses and all other nodes
1702 // which would otherwise be added to the worklist by the first call to
1703 // getNextWorklistEntry are already present in it.
1704 for (SDNode &Node : DAG.allnodes())
1705 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1706
1707 // Create a dummy node (which is not added to allnodes), that adds a reference
1708 // to the root node, preventing it from being deleted, and tracking any
1709 // changes of the root.
1710 HandleSDNode Dummy(DAG.getRoot());
1711
1712 // While we have a valid worklist entry node, try to combine it.
1713 while (SDNode *N = getNextWorklistEntry()) {
1714 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1715 // N is deleted from the DAG, since they too may now be dead or may have a
1716 // reduced number of uses, allowing other xforms.
1717 if (recursivelyDeleteUnusedNodes(N))
1718 continue;
1719
1720 WorklistRemover DeadNodes(*this);
1721
1722 // If this combine is running after legalizing the DAG, re-legalize any
1723 // nodes pulled off the worklist.
1724 if (LegalDAG) {
1725 SmallSetVector<SDNode *, 16> UpdatedNodes;
1726 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1727
1728 for (SDNode *LN : UpdatedNodes)
1729 AddToWorklistWithUsers(LN);
1730
1731 if (!NIsValid)
1732 continue;
1733 }
1734
1735 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1736
1737 // Add any operands of the new node which have not yet been combined to the
1738 // worklist as well. Because the worklist uniques things already, this
1739 // won't repeatedly process the same operand.
1740 for (const SDValue &ChildN : N->op_values())
1741 if (!CombinedNodes.count(ChildN.getNode()))
1742 AddToWorklist(ChildN.getNode());
1743
1744 CombinedNodes.insert(N);
1745 SDValue RV = combine(N);
1746
1747 if (!RV.getNode())
1748 continue;
1749
1750 ++NodesCombined;
1751
1752 // If we get back the same node we passed in, rather than a new node or
1753 // zero, we know that the node must have defined multiple values and
1754 // CombineTo was used. Since CombineTo takes care of the worklist
1755 // mechanics for us, we have no work to do in this case.
1756 if (RV.getNode() == N)
1757 continue;
1758
1759 assert(N->getOpcode() != ISD::DELETED_NODE &&
1760 RV.getOpcode() != ISD::DELETED_NODE &&
1761 "Node was deleted but visit returned new node!");
1762
1763 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1764
1765 if (N->getNumValues() == RV->getNumValues())
1766 DAG.ReplaceAllUsesWith(N, RV.getNode());
1767 else {
1768 assert(N->getValueType(0) == RV.getValueType() &&
1769 N->getNumValues() == 1 && "Type mismatch");
1770 DAG.ReplaceAllUsesWith(N, &RV);
1771 }
1772
1773 // Push the new node and any users onto the worklist. Omit this if the
1774 // new node is the EntryToken (e.g. if a store managed to get optimized
1775 // out), because re-visiting the EntryToken and its users will not uncover
1776 // any additional opportunities, but there may be a large number of such
1777 // users, potentially causing compile time explosion.
1778 if (RV.getOpcode() != ISD::EntryToken)
1779 AddToWorklistWithUsers(RV.getNode());
1780
1781 // Finally, if the node is now dead, remove it from the graph. The node
1782 // may not be dead if the replacement process recursively simplified to
1783 // something else needing this node. This will also take care of adding any
1784 // operands which have lost a user to the worklist.
1785 recursivelyDeleteUnusedNodes(N);
1786 }
1787
1788 // If the root changed (e.g. it was a dead load, update the root).
1789 DAG.setRoot(Dummy.getValue());
1790 DAG.RemoveDeadNodes();
1791}
1792
1793SDValue DAGCombiner::visit(SDNode *N) {
1794 // clang-format off
1795 switch (N->getOpcode()) {
1796 default: break;
1797 case ISD::TokenFactor: return visitTokenFactor(N);
1798 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1799 case ISD::ADD: return visitADD(N);
1800 case ISD::SUB: return visitSUB(N);
1801 case ISD::SADDSAT:
1802 case ISD::UADDSAT: return visitADDSAT(N);
1803 case ISD::SSUBSAT:
1804 case ISD::USUBSAT: return visitSUBSAT(N);
1805 case ISD::ADDC: return visitADDC(N);
1806 case ISD::SADDO:
1807 case ISD::UADDO: return visitADDO(N);
1808 case ISD::SUBC: return visitSUBC(N);
1809 case ISD::SSUBO:
1810 case ISD::USUBO: return visitSUBO(N);
1811 case ISD::ADDE: return visitADDE(N);
1812 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1813 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1814 case ISD::SUBE: return visitSUBE(N);
1815 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1816 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1817 case ISD::SMULFIX:
1818 case ISD::SMULFIXSAT:
1819 case ISD::UMULFIX:
1820 case ISD::UMULFIXSAT: return visitMULFIX(N);
1821 case ISD::MUL: return visitMUL(N);
1822 case ISD::SDIV: return visitSDIV(N);
1823 case ISD::UDIV: return visitUDIV(N);
1824 case ISD::SREM:
1825 case ISD::UREM: return visitREM(N);
1826 case ISD::MULHU: return visitMULHU(N);
1827 case ISD::MULHS: return visitMULHS(N);
1828 case ISD::AVGFLOORS:
1829 case ISD::AVGFLOORU:
1830 case ISD::AVGCEILS:
1831 case ISD::AVGCEILU: return visitAVG(N);
1832 case ISD::ABDS:
1833 case ISD::ABDU: return visitABD(N);
1834 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1835 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1836 case ISD::SMULO:
1837 case ISD::UMULO: return visitMULO(N);
1838 case ISD::SMIN:
1839 case ISD::SMAX:
1840 case ISD::UMIN:
1841 case ISD::UMAX: return visitIMINMAX(N);
1842 case ISD::AND: return visitAND(N);
1843 case ISD::OR: return visitOR(N);
1844 case ISD::XOR: return visitXOR(N);
1845 case ISD::SHL: return visitSHL(N);
1846 case ISD::SRA: return visitSRA(N);
1847 case ISD::SRL: return visitSRL(N);
1848 case ISD::ROTR:
1849 case ISD::ROTL: return visitRotate(N);
1850 case ISD::FSHL:
1851 case ISD::FSHR: return visitFunnelShift(N);
1852 case ISD::SSHLSAT:
1853 case ISD::USHLSAT: return visitSHLSAT(N);
1854 case ISD::ABS: return visitABS(N);
1855 case ISD::BSWAP: return visitBSWAP(N);
1856 case ISD::BITREVERSE: return visitBITREVERSE(N);
1857 case ISD::CTLZ: return visitCTLZ(N);
1858 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1859 case ISD::CTTZ: return visitCTTZ(N);
1860 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1861 case ISD::CTPOP: return visitCTPOP(N);
1862 case ISD::SELECT: return visitSELECT(N);
1863 case ISD::VSELECT: return visitVSELECT(N);
1864 case ISD::SELECT_CC: return visitSELECT_CC(N);
1865 case ISD::SETCC: return visitSETCC(N);
1866 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1867 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1868 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1869 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1870 case ISD::AssertSext:
1871 case ISD::AssertZext: return visitAssertExt(N);
1872 case ISD::AssertAlign: return visitAssertAlign(N);
1873 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1876 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1877 case ISD::TRUNCATE: return visitTRUNCATE(N);
1878 case ISD::BITCAST: return visitBITCAST(N);
1879 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1880 case ISD::FADD: return visitFADD(N);
1881 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1882 case ISD::FSUB: return visitFSUB(N);
1883 case ISD::FMUL: return visitFMUL(N);
1884 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1885 case ISD::FMAD: return visitFMAD(N);
1886 case ISD::FDIV: return visitFDIV(N);
1887 case ISD::FREM: return visitFREM(N);
1888 case ISD::FSQRT: return visitFSQRT(N);
1889 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1890 case ISD::FPOW: return visitFPOW(N);
1891 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1892 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1893 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1894 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1895 case ISD::LRINT:
1896 case ISD::LLRINT: return visitXRINT(N);
1897 case ISD::FP_ROUND: return visitFP_ROUND(N);
1898 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1899 case ISD::FNEG: return visitFNEG(N);
1900 case ISD::FABS: return visitFABS(N);
1901 case ISD::FFLOOR: return visitFFLOOR(N);
1902 case ISD::FMINNUM:
1903 case ISD::FMAXNUM:
1904 case ISD::FMINIMUM:
1905 case ISD::FMAXIMUM: return visitFMinMax(N);
1906 case ISD::FCEIL: return visitFCEIL(N);
1907 case ISD::FTRUNC: return visitFTRUNC(N);
1908 case ISD::FFREXP: return visitFFREXP(N);
1909 case ISD::BRCOND: return visitBRCOND(N);
1910 case ISD::BR_CC: return visitBR_CC(N);
1911 case ISD::LOAD: return visitLOAD(N);
1912 case ISD::STORE: return visitSTORE(N);
1913 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1914 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1915 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1916 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1917 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1918 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1919 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1920 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1921 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1922 case ISD::MGATHER: return visitMGATHER(N);
1923 case ISD::MLOAD: return visitMLOAD(N);
1924 case ISD::MSCATTER: return visitMSCATTER(N);
1925 case ISD::MSTORE: return visitMSTORE(N);
1926 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1927 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1928 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1929 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1930 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1931 case ISD::FREEZE: return visitFREEZE(N);
1932 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1933 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1936 case ISD::VECREDUCE_ADD:
1937 case ISD::VECREDUCE_MUL:
1938 case ISD::VECREDUCE_AND:
1939 case ISD::VECREDUCE_OR:
1940 case ISD::VECREDUCE_XOR:
1948 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1949#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1950#include "llvm/IR/VPIntrinsics.def"
1951 return visitVPOp(N);
1952 }
1953 // clang-format on
1954 return SDValue();
1955}
1956
1957SDValue DAGCombiner::combine(SDNode *N) {
1958 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1959 return SDValue();
1960
1961 SDValue RV;
1962 if (!DisableGenericCombines)
1963 RV = visit(N);
1964
1965 // If nothing happened, try a target-specific DAG combine.
1966 if (!RV.getNode()) {
1967 assert(N->getOpcode() != ISD::DELETED_NODE &&
1968 "Node was deleted but visit returned NULL!");
1969
1970 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1971 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1972
1973 // Expose the DAG combiner to the target combiner impls.
1975 DagCombineInfo(DAG, Level, false, this);
1976
1977 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1978 }
1979 }
1980
1981 // If nothing happened still, try promoting the operation.
1982 if (!RV.getNode()) {
1983 switch (N->getOpcode()) {
1984 default: break;
1985 case ISD::ADD:
1986 case ISD::SUB:
1987 case ISD::MUL:
1988 case ISD::AND:
1989 case ISD::OR:
1990 case ISD::XOR:
1991 RV = PromoteIntBinOp(SDValue(N, 0));
1992 break;
1993 case ISD::SHL:
1994 case ISD::SRA:
1995 case ISD::SRL:
1996 RV = PromoteIntShiftOp(SDValue(N, 0));
1997 break;
1998 case ISD::SIGN_EXTEND:
1999 case ISD::ZERO_EXTEND:
2000 case ISD::ANY_EXTEND:
2001 RV = PromoteExtend(SDValue(N, 0));
2002 break;
2003 case ISD::LOAD:
2004 if (PromoteLoad(SDValue(N, 0)))
2005 RV = SDValue(N, 0);
2006 break;
2007 }
2008 }
2009
2010 // If N is a commutative binary node, try to eliminate it if the commuted
2011 // version is already present in the DAG.
2012 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2013 SDValue N0 = N->getOperand(0);
2014 SDValue N1 = N->getOperand(1);
2015
2016 // Constant operands are canonicalized to RHS.
2017 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2018 SDValue Ops[] = {N1, N0};
2019 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2020 N->getFlags());
2021 if (CSENode)
2022 return SDValue(CSENode, 0);
2023 }
2024 }
2025
2026 return RV;
2027}
2028
2029/// Given a node, return its input chain if it has one, otherwise return a null
2030/// sd operand.
2032 if (unsigned NumOps = N->getNumOperands()) {
2033 if (N->getOperand(0).getValueType() == MVT::Other)
2034 return N->getOperand(0);
2035 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2036 return N->getOperand(NumOps-1);
2037 for (unsigned i = 1; i < NumOps-1; ++i)
2038 if (N->getOperand(i).getValueType() == MVT::Other)
2039 return N->getOperand(i);
2040 }
2041 return SDValue();
2042}
2043
2044SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2045 // If N has two operands, where one has an input chain equal to the other,
2046 // the 'other' chain is redundant.
2047 if (N->getNumOperands() == 2) {
2048 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2049 return N->getOperand(0);
2050 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2051 return N->getOperand(1);
2052 }
2053
2054 // Don't simplify token factors if optnone.
2055 if (OptLevel == CodeGenOptLevel::None)
2056 return SDValue();
2057
2058 // Don't simplify the token factor if the node itself has too many operands.
2059 if (N->getNumOperands() > TokenFactorInlineLimit)
2060 return SDValue();
2061
2062 // If the sole user is a token factor, we should make sure we have a
2063 // chance to merge them together. This prevents TF chains from inhibiting
2064 // optimizations.
2065 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2066 AddToWorklist(*(N->use_begin()));
2067
2068 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2069 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2071 bool Changed = false; // If we should replace this token factor.
2072
2073 // Start out with this token factor.
2074 TFs.push_back(N);
2075
2076 // Iterate through token factors. The TFs grows when new token factors are
2077 // encountered.
2078 for (unsigned i = 0; i < TFs.size(); ++i) {
2079 // Limit number of nodes to inline, to avoid quadratic compile times.
2080 // We have to add the outstanding Token Factors to Ops, otherwise we might
2081 // drop Ops from the resulting Token Factors.
2082 if (Ops.size() > TokenFactorInlineLimit) {
2083 for (unsigned j = i; j < TFs.size(); j++)
2084 Ops.emplace_back(TFs[j], 0);
2085 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2086 // combiner worklist later.
2087 TFs.resize(i);
2088 break;
2089 }
2090
2091 SDNode *TF = TFs[i];
2092 // Check each of the operands.
2093 for (const SDValue &Op : TF->op_values()) {
2094 switch (Op.getOpcode()) {
2095 case ISD::EntryToken:
2096 // Entry tokens don't need to be added to the list. They are
2097 // redundant.
2098 Changed = true;
2099 break;
2100
2101 case ISD::TokenFactor:
2102 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2103 // Queue up for processing.
2104 TFs.push_back(Op.getNode());
2105 Changed = true;
2106 break;
2107 }
2108 [[fallthrough]];
2109
2110 default:
2111 // Only add if it isn't already in the list.
2112 if (SeenOps.insert(Op.getNode()).second)
2113 Ops.push_back(Op);
2114 else
2115 Changed = true;
2116 break;
2117 }
2118 }
2119 }
2120
2121 // Re-visit inlined Token Factors, to clean them up in case they have been
2122 // removed. Skip the first Token Factor, as this is the current node.
2123 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2124 AddToWorklist(TFs[i]);
2125
2126 // Remove Nodes that are chained to another node in the list. Do so
2127 // by walking up chains breath-first stopping when we've seen
2128 // another operand. In general we must climb to the EntryNode, but we can exit
2129 // early if we find all remaining work is associated with just one operand as
2130 // no further pruning is possible.
2131
2132 // List of nodes to search through and original Ops from which they originate.
2134 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2135 SmallPtrSet<SDNode *, 16> SeenChains;
2136 bool DidPruneOps = false;
2137
2138 unsigned NumLeftToConsider = 0;
2139 for (const SDValue &Op : Ops) {
2140 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2141 OpWorkCount.push_back(1);
2142 }
2143
2144 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2145 // If this is an Op, we can remove the op from the list. Remark any
2146 // search associated with it as from the current OpNumber.
2147 if (SeenOps.contains(Op)) {
2148 Changed = true;
2149 DidPruneOps = true;
2150 unsigned OrigOpNumber = 0;
2151 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2152 OrigOpNumber++;
2153 assert((OrigOpNumber != Ops.size()) &&
2154 "expected to find TokenFactor Operand");
2155 // Re-mark worklist from OrigOpNumber to OpNumber
2156 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2157 if (Worklist[i].second == OrigOpNumber) {
2158 Worklist[i].second = OpNumber;
2159 }
2160 }
2161 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2162 OpWorkCount[OrigOpNumber] = 0;
2163 NumLeftToConsider--;
2164 }
2165 // Add if it's a new chain
2166 if (SeenChains.insert(Op).second) {
2167 OpWorkCount[OpNumber]++;
2168 Worklist.push_back(std::make_pair(Op, OpNumber));
2169 }
2170 };
2171
2172 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2173 // We need at least be consider at least 2 Ops to prune.
2174 if (NumLeftToConsider <= 1)
2175 break;
2176 auto CurNode = Worklist[i].first;
2177 auto CurOpNumber = Worklist[i].second;
2178 assert((OpWorkCount[CurOpNumber] > 0) &&
2179 "Node should not appear in worklist");
2180 switch (CurNode->getOpcode()) {
2181 case ISD::EntryToken:
2182 // Hitting EntryToken is the only way for the search to terminate without
2183 // hitting
2184 // another operand's search. Prevent us from marking this operand
2185 // considered.
2186 NumLeftToConsider++;
2187 break;
2188 case ISD::TokenFactor:
2189 for (const SDValue &Op : CurNode->op_values())
2190 AddToWorklist(i, Op.getNode(), CurOpNumber);
2191 break;
2193 case ISD::LIFETIME_END:
2194 case ISD::CopyFromReg:
2195 case ISD::CopyToReg:
2196 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2197 break;
2198 default:
2199 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2200 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2201 break;
2202 }
2203 OpWorkCount[CurOpNumber]--;
2204 if (OpWorkCount[CurOpNumber] == 0)
2205 NumLeftToConsider--;
2206 }
2207
2208 // If we've changed things around then replace token factor.
2209 if (Changed) {
2211 if (Ops.empty()) {
2212 // The entry token is the only possible outcome.
2213 Result = DAG.getEntryNode();
2214 } else {
2215 if (DidPruneOps) {
2216 SmallVector<SDValue, 8> PrunedOps;
2217 //
2218 for (const SDValue &Op : Ops) {
2219 if (SeenChains.count(Op.getNode()) == 0)
2220 PrunedOps.push_back(Op);
2221 }
2222 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2223 } else {
2224 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2225 }
2226 }
2227 return Result;
2228 }
2229 return SDValue();
2230}
2231
2232/// MERGE_VALUES can always be eliminated.
2233SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2234 WorklistRemover DeadNodes(*this);
2235 // Replacing results may cause a different MERGE_VALUES to suddenly
2236 // be CSE'd with N, and carry its uses with it. Iterate until no
2237 // uses remain, to ensure that the node can be safely deleted.
2238 // First add the users of this node to the work list so that they
2239 // can be tried again once they have new operands.
2240 AddUsersToWorklist(N);
2241 do {
2242 // Do as a single replacement to avoid rewalking use lists.
2244 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2245 Ops.push_back(N->getOperand(i));
2246 DAG.ReplaceAllUsesWith(N, Ops.data());
2247 } while (!N->use_empty());
2248 deleteAndRecombine(N);
2249 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2250}
2251
2252/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2253/// ConstantSDNode pointer else nullptr.
2255 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2256 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2257}
2258
2259// isTruncateOf - If N is a truncate of some other value, return true, record
2260// the value being truncated in Op and which of Op's bits are zero/one in Known.
2261// This function computes KnownBits to avoid a duplicated call to
2262// computeKnownBits in the caller.
2264 KnownBits &Known) {
2265 if (N->getOpcode() == ISD::TRUNCATE) {
2266 Op = N->getOperand(0);
2267 Known = DAG.computeKnownBits(Op);
2268 return true;
2269 }
2270
2271 if (N.getOpcode() != ISD::SETCC ||
2272 N.getValueType().getScalarType() != MVT::i1 ||
2273 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2274 return false;
2275
2276 SDValue Op0 = N->getOperand(0);
2277 SDValue Op1 = N->getOperand(1);
2278 assert(Op0.getValueType() == Op1.getValueType());
2279
2280 if (isNullOrNullSplat(Op0))
2281 Op = Op1;
2282 else if (isNullOrNullSplat(Op1))
2283 Op = Op0;
2284 else
2285 return false;
2286
2287 Known = DAG.computeKnownBits(Op);
2288
2289 return (Known.Zero | 1).isAllOnes();
2290}
2291
2292/// Return true if 'Use' is a load or a store that uses N as its base pointer
2293/// and that N may be folded in the load / store addressing mode.
2295 const TargetLowering &TLI) {
2296 EVT VT;
2297 unsigned AS;
2298
2299 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2300 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2301 return false;
2302 VT = LD->getMemoryVT();
2303 AS = LD->getAddressSpace();
2304 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2305 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2306 return false;
2307 VT = ST->getMemoryVT();
2308 AS = ST->getAddressSpace();
2309 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2310 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2311 return false;
2312 VT = LD->getMemoryVT();
2313 AS = LD->getAddressSpace();
2314 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2315 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2316 return false;
2317 VT = ST->getMemoryVT();
2318 AS = ST->getAddressSpace();
2319 } else {
2320 return false;
2321 }
2322
2324 if (N->getOpcode() == ISD::ADD) {
2325 AM.HasBaseReg = true;
2326 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2327 if (Offset)
2328 // [reg +/- imm]
2329 AM.BaseOffs = Offset->getSExtValue();
2330 else
2331 // [reg +/- reg]
2332 AM.Scale = 1;
2333 } else if (N->getOpcode() == ISD::SUB) {
2334 AM.HasBaseReg = true;
2335 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2336 if (Offset)
2337 // [reg +/- imm]
2338 AM.BaseOffs = -Offset->getSExtValue();
2339 else
2340 // [reg +/- reg]
2341 AM.Scale = 1;
2342 } else {
2343 return false;
2344 }
2345
2346 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2347 VT.getTypeForEVT(*DAG.getContext()), AS);
2348}
2349
2350/// This inverts a canonicalization in IR that replaces a variable select arm
2351/// with an identity constant. Codegen improves if we re-use the variable
2352/// operand rather than load a constant. This can also be converted into a
2353/// masked vector operation if the target supports it.
2355 bool ShouldCommuteOperands) {
2356 // Match a select as operand 1. The identity constant that we are looking for
2357 // is only valid as operand 1 of a non-commutative binop.
2358 SDValue N0 = N->getOperand(0);
2359 SDValue N1 = N->getOperand(1);
2360 if (ShouldCommuteOperands)
2361 std::swap(N0, N1);
2362
2363 // TODO: Should this apply to scalar select too?
2364 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2365 return SDValue();
2366
2367 // We can't hoist all instructions because of immediate UB (not speculatable).
2368 // For example div/rem by zero.
2370 return SDValue();
2371
2372 unsigned Opcode = N->getOpcode();
2373 EVT VT = N->getValueType(0);
2374 SDValue Cond = N1.getOperand(0);
2375 SDValue TVal = N1.getOperand(1);
2376 SDValue FVal = N1.getOperand(2);
2377
2378 // This transform increases uses of N0, so freeze it to be safe.
2379 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2380 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2381 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2382 SDValue F0 = DAG.getFreeze(N0);
2383 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2384 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2385 }
2386 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2387 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2388 SDValue F0 = DAG.getFreeze(N0);
2389 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2390 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2391 }
2392
2393 return SDValue();
2394}
2395
2396SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2397 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2398 "Unexpected binary operator");
2399
2400 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2401 auto BinOpcode = BO->getOpcode();
2402 EVT VT = BO->getValueType(0);
2403 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2404 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2405 return Sel;
2406
2407 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2408 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2409 return Sel;
2410 }
2411
2412 // Don't do this unless the old select is going away. We want to eliminate the
2413 // binary operator, not replace a binop with a select.
2414 // TODO: Handle ISD::SELECT_CC.
2415 unsigned SelOpNo = 0;
2416 SDValue Sel = BO->getOperand(0);
2417 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2418 SelOpNo = 1;
2419 Sel = BO->getOperand(1);
2420
2421 // Peek through trunc to shift amount type.
2422 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2423 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2424 // This is valid when the truncated bits of x are already zero.
2425 SDValue Op;
2426 KnownBits Known;
2427 if (isTruncateOf(DAG, Sel, Op, Known) &&
2429 Sel = Op;
2430 }
2431 }
2432
2433 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2434 return SDValue();
2435
2436 SDValue CT = Sel.getOperand(1);
2437 if (!isConstantOrConstantVector(CT, true) &&
2439 return SDValue();
2440
2441 SDValue CF = Sel.getOperand(2);
2442 if (!isConstantOrConstantVector(CF, true) &&
2444 return SDValue();
2445
2446 // Bail out if any constants are opaque because we can't constant fold those.
2447 // The exception is "and" and "or" with either 0 or -1 in which case we can
2448 // propagate non constant operands into select. I.e.:
2449 // and (select Cond, 0, -1), X --> select Cond, 0, X
2450 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2451 bool CanFoldNonConst =
2452 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2455
2456 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2457 if (!CanFoldNonConst &&
2458 !isConstantOrConstantVector(CBO, true) &&
2460 return SDValue();
2461
2462 SDLoc DL(Sel);
2463 SDValue NewCT, NewCF;
2464
2465 if (CanFoldNonConst) {
2466 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2467 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2468 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2469 NewCT = CT;
2470 else
2471 NewCT = CBO;
2472
2473 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2474 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2475 NewCF = CF;
2476 else
2477 NewCF = CBO;
2478 } else {
2479 // We have a select-of-constants followed by a binary operator with a
2480 // constant. Eliminate the binop by pulling the constant math into the
2481 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2482 // CBO, CF + CBO
2483 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2484 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2485 if (!NewCT)
2486 return SDValue();
2487
2488 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2489 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2490 if (!NewCF)
2491 return SDValue();
2492 }
2493
2494 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2495 SelectOp->setFlags(BO->getFlags());
2496 return SelectOp;
2497}
2498
2500 SelectionDAG &DAG) {
2501 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2502 "Expecting add or sub");
2503
2504 // Match a constant operand and a zext operand for the math instruction:
2505 // add Z, C
2506 // sub C, Z
2507 bool IsAdd = N->getOpcode() == ISD::ADD;
2508 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2509 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2510 auto *CN = dyn_cast<ConstantSDNode>(C);
2511 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2512 return SDValue();
2513
2514 // Match the zext operand as a setcc of a boolean.
2515 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2516 Z.getOperand(0).getValueType() != MVT::i1)
2517 return SDValue();
2518
2519 // Match the compare as: setcc (X & 1), 0, eq.
2520 SDValue SetCC = Z.getOperand(0);
2521 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2522 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2523 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2524 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2525 return SDValue();
2526
2527 // We are adding/subtracting a constant and an inverted low bit. Turn that
2528 // into a subtract/add of the low bit with incremented/decremented constant:
2529 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2530 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2531 EVT VT = C.getValueType();
2532 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2533 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2534 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2535 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2536}
2537
2538// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2539SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2540 SDValue N0 = N->getOperand(0);
2541 EVT VT = N0.getValueType();
2542 SDValue A, B;
2543
2544 if (hasOperation(ISD::AVGCEILU, VT) &&
2547 m_SpecificInt(1))))) {
2548 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2549 }
2550 if (hasOperation(ISD::AVGCEILS, VT) &&
2553 m_SpecificInt(1))))) {
2554 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2555 }
2556 return SDValue();
2557}
2558
2559/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2560/// a shift and add with a different constant.
2562 SelectionDAG &DAG) {
2563 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2564 "Expecting add or sub");
2565
2566 // We need a constant operand for the add/sub, and the other operand is a
2567 // logical shift right: add (srl), C or sub C, (srl).
2568 bool IsAdd = N->getOpcode() == ISD::ADD;
2569 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2570 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2571 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2572 ShiftOp.getOpcode() != ISD::SRL)
2573 return SDValue();
2574
2575 // The shift must be of a 'not' value.
2576 SDValue Not = ShiftOp.getOperand(0);
2577 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2578 return SDValue();
2579
2580 // The shift must be moving the sign bit to the least-significant-bit.
2581 EVT VT = ShiftOp.getValueType();
2582 SDValue ShAmt = ShiftOp.getOperand(1);
2583 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2584 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2585 return SDValue();
2586
2587 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2588 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2589 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2590 if (SDValue NewC = DAG.FoldConstantArithmetic(
2591 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2592 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2593 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2594 Not.getOperand(0), ShAmt);
2595 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2596 }
2597
2598 return SDValue();
2599}
2600
2601static bool
2603 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2604 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2605}
2606
2607/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2608/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2609/// are no common bits set in the operands).
2610SDValue DAGCombiner::visitADDLike(SDNode *N) {
2611 SDValue N0 = N->getOperand(0);
2612 SDValue N1 = N->getOperand(1);
2613 EVT VT = N0.getValueType();
2614 SDLoc DL(N);
2615
2616 // fold (add x, undef) -> undef
2617 if (N0.isUndef())
2618 return N0;
2619 if (N1.isUndef())
2620 return N1;
2621
2622 // fold (add c1, c2) -> c1+c2
2623 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2624 return C;
2625
2626 // canonicalize constant to RHS
2629 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2630
2631 if (areBitwiseNotOfEachother(N0, N1))
2633 SDLoc(N), VT);
2634
2635 // fold vector ops
2636 if (VT.isVector()) {
2637 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2638 return FoldedVOp;
2639
2640 // fold (add x, 0) -> x, vector edition
2642 return N0;
2643 }
2644
2645 // fold (add x, 0) -> x
2646 if (isNullConstant(N1))
2647 return N0;
2648
2649 if (N0.getOpcode() == ISD::SUB) {
2650 SDValue N00 = N0.getOperand(0);
2651 SDValue N01 = N0.getOperand(1);
2652
2653 // fold ((A-c1)+c2) -> (A+(c2-c1))
2654 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2655 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2656
2657 // fold ((c1-A)+c2) -> (c1+c2)-A
2658 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2659 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2660 }
2661
2662 // add (sext i1 X), 1 -> zext (not i1 X)
2663 // We don't transform this pattern:
2664 // add (zext i1 X), -1 -> sext (not i1 X)
2665 // because most (?) targets generate better code for the zext form.
2666 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2667 isOneOrOneSplat(N1)) {
2668 SDValue X = N0.getOperand(0);
2669 if ((!LegalOperations ||
2670 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2672 X.getScalarValueSizeInBits() == 1) {
2673 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2674 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2675 }
2676 }
2677
2678 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2679 // iff (or x, c0) is equivalent to (add x, c0).
2680 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2681 // iff (xor x, c0) is equivalent to (add x, c0).
2682 if (DAG.isADDLike(N0)) {
2683 SDValue N01 = N0.getOperand(1);
2684 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2685 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2686 }
2687
2688 if (SDValue NewSel = foldBinOpIntoSelect(N))
2689 return NewSel;
2690
2691 // reassociate add
2692 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2693 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2694 return RADD;
2695
2696 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2697 // equivalent to (add x, c).
2698 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2699 // equivalent to (add x, c).
2700 // Do this optimization only when adding c does not introduce instructions
2701 // for adding carries.
2702 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2703 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2704 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2705 // If N0's type does not split or is a sign mask, it does not introduce
2706 // add carry.
2707 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2708 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2711 if (NoAddCarry)
2712 return DAG.getNode(
2713 ISD::ADD, DL, VT,
2714 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2715 N0.getOperand(1));
2716 }
2717 return SDValue();
2718 };
2719 if (SDValue Add = ReassociateAddOr(N0, N1))
2720 return Add;
2721 if (SDValue Add = ReassociateAddOr(N1, N0))
2722 return Add;
2723
2724 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2725 if (SDValue SD =
2726 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2727 return SD;
2728 }
2729
2730 SDValue A, B, C;
2731
2732 // fold ((0-A) + B) -> B-A
2733 if (sd_match(N0, m_Neg(m_Value(A))))
2734 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2735
2736 // fold (A + (0-B)) -> A-B
2737 if (sd_match(N1, m_Neg(m_Value(B))))
2738 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2739
2740 // fold (A+(B-A)) -> B
2741 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2742 return B;
2743
2744 // fold ((B-A)+A) -> B
2745 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2746 return B;
2747
2748 // fold ((A-B)+(C-A)) -> (C-B)
2749 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2751 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2752
2753 // fold ((A-B)+(B-C)) -> (A-C)
2754 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2756 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2757
2758 // fold (A+(B-(A+C))) to (B-C)
2759 // fold (A+(B-(C+A))) to (B-C)
2760 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2761 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2762
2763 // fold (A+((B-A)+or-C)) to (B+or-C)
2764 if (sd_match(N1,
2766 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2767 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2768
2769 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2770 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2771 N0->hasOneUse() && N1->hasOneUse()) {
2772 SDValue N00 = N0.getOperand(0);
2773 SDValue N01 = N0.getOperand(1);
2774 SDValue N10 = N1.getOperand(0);
2775 SDValue N11 = N1.getOperand(1);
2776
2778 return DAG.getNode(ISD::SUB, DL, VT,
2779 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2780 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2781 }
2782
2783 // fold (add (umax X, C), -C) --> (usubsat X, C)
2784 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2785 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2786 return (!Max && !Op) ||
2787 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2788 };
2789 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2790 /*AllowUndefs*/ true))
2791 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2792 N0.getOperand(1));
2793 }
2794
2796 return SDValue(N, 0);
2797
2798 if (isOneOrOneSplat(N1)) {
2799 // fold (add (xor a, -1), 1) -> (sub 0, a)
2800 if (isBitwiseNot(N0))
2801 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2802 N0.getOperand(0));
2803
2804 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2805 if (N0.getOpcode() == ISD::ADD) {
2806 SDValue A, Xor;
2807
2808 if (isBitwiseNot(N0.getOperand(0))) {
2809 A = N0.getOperand(1);
2810 Xor = N0.getOperand(0);
2811 } else if (isBitwiseNot(N0.getOperand(1))) {
2812 A = N0.getOperand(0);
2813 Xor = N0.getOperand(1);
2814 }
2815
2816 if (Xor)
2817 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2818 }
2819
2820 // Look for:
2821 // add (add x, y), 1
2822 // And if the target does not like this form then turn into:
2823 // sub y, (xor x, -1)
2824 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825 N0.hasOneUse() &&
2826 // Limit this to after legalization if the add has wrap flags
2827 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2828 !N->getFlags().hasNoSignedWrap()))) {
2829 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2830 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2831 }
2832 }
2833
2834 // (x - y) + -1 -> add (xor y, -1), x
2835 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2836 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2837 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2838 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2839 }
2840
2841 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2842 return Combined;
2843
2844 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2845 return Combined;
2846
2847 return SDValue();
2848}
2849
2850// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2851SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2852 SDValue N0 = N->getOperand(0);
2853 EVT VT = N0.getValueType();
2854 SDValue A, B;
2855
2856 if (hasOperation(ISD::AVGFLOORU, VT) &&
2859 m_SpecificInt(1))))) {
2860 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2861 }
2862 if (hasOperation(ISD::AVGFLOORS, VT) &&
2865 m_SpecificInt(1))))) {
2866 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2867 }
2868
2869 return SDValue();
2870}
2871
2872SDValue DAGCombiner::visitADD(SDNode *N) {
2873 SDValue N0 = N->getOperand(0);
2874 SDValue N1 = N->getOperand(1);
2875 EVT VT = N0.getValueType();
2876 SDLoc DL(N);
2877
2878 if (SDValue Combined = visitADDLike(N))
2879 return Combined;
2880
2881 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2882 return V;
2883
2884 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2885 return V;
2886
2887 // Try to match AVGFLOOR fixedwidth pattern
2888 if (SDValue V = foldAddToAvg(N, DL))
2889 return V;
2890
2891 // fold (a+b) -> (a|b) iff a and b share no bits.
2892 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2893 DAG.haveNoCommonBitsSet(N0, N1)) {
2895 Flags.setDisjoint(true);
2896 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2897 }
2898
2899 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2900 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2901 const APInt &C0 = N0->getConstantOperandAPInt(0);
2902 const APInt &C1 = N1->getConstantOperandAPInt(0);
2903 return DAG.getVScale(DL, VT, C0 + C1);
2904 }
2905
2906 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2907 if (N0.getOpcode() == ISD::ADD &&
2908 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2909 N1.getOpcode() == ISD::VSCALE) {
2910 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2911 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2912 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2913 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2914 }
2915
2916 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2917 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2918 N1.getOpcode() == ISD::STEP_VECTOR) {
2919 const APInt &C0 = N0->getConstantOperandAPInt(0);
2920 const APInt &C1 = N1->getConstantOperandAPInt(0);
2921 APInt NewStep = C0 + C1;
2922 return DAG.getStepVector(DL, VT, NewStep);
2923 }
2924
2925 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2926 if (N0.getOpcode() == ISD::ADD &&
2928 N1.getOpcode() == ISD::STEP_VECTOR) {
2929 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2930 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2931 APInt NewStep = SV0 + SV1;
2932 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2933 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2934 }
2935
2936 return SDValue();
2937}
2938
2939SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2940 unsigned Opcode = N->getOpcode();
2941 SDValue N0 = N->getOperand(0);
2942 SDValue N1 = N->getOperand(1);
2943 EVT VT = N0.getValueType();
2944 bool IsSigned = Opcode == ISD::SADDSAT;
2945 SDLoc DL(N);
2946
2947 // fold (add_sat x, undef) -> -1
2948 if (N0.isUndef() || N1.isUndef())
2949 return DAG.getAllOnesConstant(DL, VT);
2950
2951 // fold (add_sat c1, c2) -> c3
2952 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2953 return C;
2954
2955 // canonicalize constant to RHS
2958 return DAG.getNode(Opcode, DL, VT, N1, N0);
2959
2960 // fold vector ops
2961 if (VT.isVector()) {
2962 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2963 return FoldedVOp;
2964
2965 // fold (add_sat x, 0) -> x, vector edition
2967 return N0;
2968 }
2969
2970 // fold (add_sat x, 0) -> x
2971 if (isNullConstant(N1))
2972 return N0;
2973
2974 // If it cannot overflow, transform into an add.
2975 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
2976 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2977
2978 return SDValue();
2979}
2980
2982 bool ForceCarryReconstruction = false) {
2983 bool Masked = false;
2984
2985 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2986 while (true) {
2987 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2988 V = V.getOperand(0);
2989 continue;
2990 }
2991
2992 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2993 if (ForceCarryReconstruction)
2994 return V;
2995
2996 Masked = true;
2997 V = V.getOperand(0);
2998 continue;
2999 }
3000
3001 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3002 return V;
3003
3004 break;
3005 }
3006
3007 // If this is not a carry, return.
3008 if (V.getResNo() != 1)
3009 return SDValue();
3010
3011 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3012 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3013 return SDValue();
3014
3015 EVT VT = V->getValueType(0);
3016 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3017 return SDValue();
3018
3019 // If the result is masked, then no matter what kind of bool it is we can
3020 // return. If it isn't, then we need to make sure the bool type is either 0 or
3021 // 1 and not other values.
3022 if (Masked ||
3023 TLI.getBooleanContents(V.getValueType()) ==
3025 return V;
3026
3027 return SDValue();
3028}
3029
3030/// Given the operands of an add/sub operation, see if the 2nd operand is a
3031/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3032/// the opcode and bypass the mask operation.
3033static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3034 SelectionDAG &DAG, const SDLoc &DL) {
3035 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3036 N1 = N1.getOperand(0);
3037
3038 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3039 return SDValue();
3040
3041 EVT VT = N0.getValueType();
3042 SDValue N10 = N1.getOperand(0);
3043 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3044 N10 = N10.getOperand(0);
3045
3046 if (N10.getValueType() != VT)
3047 return SDValue();
3048
3049 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3050 return SDValue();
3051
3052 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3053 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3054 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3055}
3056
3057/// Helper for doing combines based on N0 and N1 being added to each other.
3058SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3059 SDNode *LocReference) {
3060 EVT VT = N0.getValueType();
3061 SDLoc DL(LocReference);
3062
3063 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3064 SDValue Y, N;
3065 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3066 return DAG.getNode(ISD::SUB, DL, VT, N0,
3067 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3068
3069 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3070 return V;
3071
3072 // Look for:
3073 // add (add x, 1), y
3074 // And if the target does not like this form then turn into:
3075 // sub y, (xor x, -1)
3076 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3077 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3078 // Limit this to after legalization if the add has wrap flags
3079 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3080 !N0->getFlags().hasNoSignedWrap()))) {
3081 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3082 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3083 }
3084
3085 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3086 // Hoist one-use subtraction by non-opaque constant:
3087 // (x - C) + y -> (x + y) - C
3088 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3089 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3090 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3091 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3092 }
3093 // Hoist one-use subtraction from non-opaque constant:
3094 // (C - x) + y -> (y - x) + C
3095 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3096 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3097 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3098 }
3099 }
3100
3101 // add (mul x, C), x -> mul x, C+1
3102 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3103 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3104 N0.hasOneUse()) {
3105 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3106 DAG.getConstant(1, DL, VT));
3107 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3108 }
3109
3110 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3111 // rather than 'add 0/-1' (the zext should get folded).
3112 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3113 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3114 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3116 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3117 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3118 }
3119
3120 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3121 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3122 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3123 if (TN->getVT() == MVT::i1) {
3124 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3125 DAG.getConstant(1, DL, VT));
3126 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3127 }
3128 }
3129
3130 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3131 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3132 N1.getResNo() == 0)
3133 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3134 N0, N1.getOperand(0), N1.getOperand(2));
3135
3136 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3138 if (SDValue Carry = getAsCarry(TLI, N1))
3139 return DAG.getNode(ISD::UADDO_CARRY, DL,
3140 DAG.getVTList(VT, Carry.getValueType()), N0,
3141 DAG.getConstant(0, DL, VT), Carry);
3142
3143 return SDValue();
3144}
3145
3146SDValue DAGCombiner::visitADDC(SDNode *N) {
3147 SDValue N0 = N->getOperand(0);
3148 SDValue N1 = N->getOperand(1);
3149 EVT VT = N0.getValueType();
3150 SDLoc DL(N);
3151
3152 // If the flag result is dead, turn this into an ADD.
3153 if (!N->hasAnyUseOfValue(1))
3154 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3155 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3156
3157 // canonicalize constant to RHS.
3158 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3159 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3160 if (N0C && !N1C)
3161 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3162
3163 // fold (addc x, 0) -> x + no carry out
3164 if (isNullConstant(N1))
3165 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3166 DL, MVT::Glue));
3167
3168 // If it cannot overflow, transform into an add.
3170 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3171 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3172
3173 return SDValue();
3174}
3175
3176/**
3177 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3178 * then the flip also occurs if computing the inverse is the same cost.
3179 * This function returns an empty SDValue in case it cannot flip the boolean
3180 * without increasing the cost of the computation. If you want to flip a boolean
3181 * no matter what, use DAG.getLogicalNOT.
3182 */
3184 const TargetLowering &TLI,
3185 bool Force) {
3186 if (Force && isa<ConstantSDNode>(V))
3187 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3188
3189 if (V.getOpcode() != ISD::XOR)
3190 return SDValue();
3191
3192 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3193 if (!Const)
3194 return SDValue();
3195
3196 EVT VT = V.getValueType();
3197
3198 bool IsFlip = false;
3199 switch(TLI.getBooleanContents(VT)) {
3201 IsFlip = Const->isOne();
3202 break;
3204 IsFlip = Const->isAllOnes();
3205 break;
3207 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3208 break;
3209 }
3210
3211 if (IsFlip)
3212 return V.getOperand(0);
3213 if (Force)
3214 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3215 return SDValue();
3216}
3217
3218SDValue DAGCombiner::visitADDO(SDNode *N) {
3219 SDValue N0 = N->getOperand(0);
3220 SDValue N1 = N->getOperand(1);
3221 EVT VT = N0.getValueType();
3222 bool IsSigned = (ISD::SADDO == N->getOpcode());
3223
3224 EVT CarryVT = N->getValueType(1);
3225 SDLoc DL(N);
3226
3227 // If the flag result is dead, turn this into an ADD.
3228 if (!N->hasAnyUseOfValue(1))
3229 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3230 DAG.getUNDEF(CarryVT));
3231
3232 // canonicalize constant to RHS.
3235 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3236
3237 // fold (addo x, 0) -> x + no carry out
3238 if (isNullOrNullSplat(N1))
3239 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3240
3241 // If it cannot overflow, transform into an add.
3242 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3243 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3244 DAG.getConstant(0, DL, CarryVT));
3245
3246 if (IsSigned) {
3247 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3248 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3249 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3250 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3251 } else {
3252 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3253 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3254 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3255 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3256 return CombineTo(
3257 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3258 }
3259
3260 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3261 return Combined;
3262
3263 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3264 return Combined;
3265 }
3266
3267 return SDValue();
3268}
3269
3270SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3271 EVT VT = N0.getValueType();
3272 if (VT.isVector())
3273 return SDValue();
3274
3275 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3276 // If Y + 1 cannot overflow.
3277 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3278 SDValue Y = N1.getOperand(0);
3279 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3281 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3282 N1.getOperand(2));
3283 }
3284
3285 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3287 if (SDValue Carry = getAsCarry(TLI, N1))
3288 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3289 DAG.getConstant(0, SDLoc(N), VT), Carry);
3290
3291 return SDValue();
3292}
3293
3294SDValue DAGCombiner::visitADDE(SDNode *N) {
3295 SDValue N0 = N->getOperand(0);
3296 SDValue N1 = N->getOperand(1);
3297 SDValue CarryIn = N->getOperand(2);
3298
3299 // canonicalize constant to RHS
3300 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3301 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3302 if (N0C && !N1C)
3303 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3304 N1, N0, CarryIn);
3305
3306 // fold (adde x, y, false) -> (addc x, y)
3307 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3308 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3309
3310 return SDValue();
3311}
3312
3313SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3314 SDValue N0 = N->getOperand(0);
3315 SDValue N1 = N->getOperand(1);
3316 SDValue CarryIn = N->getOperand(2);
3317 SDLoc DL(N);
3318
3319 // canonicalize constant to RHS
3320 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3321 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3322 if (N0C && !N1C)
3323 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3324
3325 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3326 if (isNullConstant(CarryIn)) {
3327 if (!LegalOperations ||
3328 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3329 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3330 }
3331
3332 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3333 if (isNullConstant(N0) && isNullConstant(N1)) {
3334 EVT VT = N0.getValueType();
3335 EVT CarryVT = CarryIn.getValueType();
3336 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3337 AddToWorklist(CarryExt.getNode());
3338 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3339 DAG.getConstant(1, DL, VT)),
3340 DAG.getConstant(0, DL, CarryVT));
3341 }
3342
3343 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3344 return Combined;
3345
3346 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3347 return Combined;
3348
3349 // We want to avoid useless duplication.
3350 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3351 // not a binary operation, this is not really possible to leverage this
3352 // existing mechanism for it. However, if more operations require the same
3353 // deduplication logic, then it may be worth generalize.
3354 SDValue Ops[] = {N1, N0, CarryIn};
3355 SDNode *CSENode =
3356 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3357 if (CSENode)
3358 return SDValue(CSENode, 0);
3359
3360 return SDValue();
3361}
3362
3363/**
3364 * If we are facing some sort of diamond carry propagation pattern try to
3365 * break it up to generate something like:
3366 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3367 *
3368 * The end result is usually an increase in operation required, but because the
3369 * carry is now linearized, other transforms can kick in and optimize the DAG.
3370 *
3371 * Patterns typically look something like
3372 * (uaddo A, B)
3373 * / \
3374 * Carry Sum
3375 * | \
3376 * | (uaddo_carry *, 0, Z)
3377 * | /
3378 * \ Carry
3379 * | /
3380 * (uaddo_carry X, *, *)
3381 *
3382 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3383 * produce a combine with a single path for carry propagation.
3384 */
3386 SelectionDAG &DAG, SDValue X,
3387 SDValue Carry0, SDValue Carry1,
3388 SDNode *N) {
3389 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3390 return SDValue();
3391 if (Carry1.getOpcode() != ISD::UADDO)
3392 return SDValue();
3393
3394 SDValue Z;
3395
3396 /**
3397 * First look for a suitable Z. It will present itself in the form of
3398 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3399 */
3400 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3401 isNullConstant(Carry0.getOperand(1))) {
3402 Z = Carry0.getOperand(2);
3403 } else if (Carry0.getOpcode() == ISD::UADDO &&
3404 isOneConstant(Carry0.getOperand(1))) {
3405 EVT VT = Carry0->getValueType(1);
3406 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3407 } else {
3408 // We couldn't find a suitable Z.
3409 return SDValue();
3410 }
3411
3412
3413 auto cancelDiamond = [&](SDValue A,SDValue B) {
3414 SDLoc DL(N);
3415 SDValue NewY =
3416 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3417 Combiner.AddToWorklist(NewY.getNode());
3418 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3419 DAG.getConstant(0, DL, X.getValueType()),
3420 NewY.getValue(1));
3421 };
3422
3423 /**
3424 * (uaddo A, B)
3425 * |
3426 * Sum
3427 * |
3428 * (uaddo_carry *, 0, Z)
3429 */
3430 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3431 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3432 }
3433
3434 /**
3435 * (uaddo_carry A, 0, Z)
3436 * |
3437 * Sum
3438 * |
3439 * (uaddo *, B)
3440 */
3441 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3442 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3443 }
3444
3445 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3446 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3447 }
3448
3449 return SDValue();
3450}
3451
3452// If we are facing some sort of diamond carry/borrow in/out pattern try to
3453// match patterns like:
3454//
3455// (uaddo A, B) CarryIn
3456// | \ |
3457// | \ |
3458// PartialSum PartialCarryOutX /
3459// | | /
3460// | ____|____________/
3461// | / |
3462// (uaddo *, *) \________
3463// | \ \
3464// | \ |
3465// | PartialCarryOutY |
3466// | \ |
3467// | \ /
3468// AddCarrySum | ______/
3469// | /
3470// CarryOut = (or *, *)
3471//
3472// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3473//
3474// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3475//
3476// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3477// with a single path for carry/borrow out propagation.
3479 SDValue N0, SDValue N1, SDNode *N) {
3480 SDValue Carry0 = getAsCarry(TLI, N0);
3481 if (!Carry0)
3482 return SDValue();
3483 SDValue Carry1 = getAsCarry(TLI, N1);
3484 if (!Carry1)
3485 return SDValue();
3486
3487 unsigned Opcode = Carry0.getOpcode();
3488 if (Opcode != Carry1.getOpcode())
3489 return SDValue();
3490 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3491 return SDValue();
3492 // Guarantee identical type of CarryOut
3493 EVT CarryOutType = N->getValueType(0);
3494 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3495 CarryOutType != Carry1.getValue(1).getValueType())
3496 return SDValue();
3497
3498 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3499 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3500 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3501 std::swap(Carry0, Carry1);
3502
3503 // Check if nodes are connected in expected way.
3504 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3505 Carry1.getOperand(1) != Carry0.getValue(0))
3506 return SDValue();
3507
3508 // The carry in value must be on the righthand side for subtraction.
3509 unsigned CarryInOperandNum =
3510 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3511 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3512 return SDValue();
3513 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3514
3515 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3516 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3517 return SDValue();
3518
3519 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3520 CarryIn = getAsCarry(TLI, CarryIn, true);
3521 if (!CarryIn)
3522 return SDValue();
3523
3524 SDLoc DL(N);
3525 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3526 Carry1->getValueType(0));
3527 SDValue Merged =
3528 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3529 Carry0.getOperand(1), CarryIn);
3530
3531 // Please note that because we have proven that the result of the UADDO/USUBO
3532 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3533 // therefore prove that if the first UADDO/USUBO overflows, the second
3534 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3535 // maximum value.
3536 //
3537 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3538 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3539 //
3540 // This is important because it means that OR and XOR can be used to merge
3541 // carry flags; and that AND can return a constant zero.
3542 //
3543 // TODO: match other operations that can merge flags (ADD, etc)
3544 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3545 if (N->getOpcode() == ISD::AND)
3546 return DAG.getConstant(0, DL, CarryOutType);
3547 return Merged.getValue(1);
3548}
3549
3550SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3551 SDValue CarryIn, SDNode *N) {
3552 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3553 // carry.
3554 if (isBitwiseNot(N0))
3555 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3556 SDLoc DL(N);
3557 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3558 N0.getOperand(0), NotC);
3559 return CombineTo(
3560 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3561 }
3562
3563 // Iff the flag result is dead:
3564 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3565 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3566 // or the dependency between the instructions.
3567 if ((N0.getOpcode() == ISD::ADD ||
3568 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3569 N0.getValue(1) != CarryIn)) &&
3570 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3571 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3572 N0.getOperand(0), N0.getOperand(1), CarryIn);
3573
3574 /**
3575 * When one of the uaddo_carry argument is itself a carry, we may be facing
3576 * a diamond carry propagation. In which case we try to transform the DAG
3577 * to ensure linear carry propagation if that is possible.
3578 */
3579 if (auto Y = getAsCarry(TLI, N1)) {
3580 // Because both are carries, Y and Z can be swapped.
3581 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3582 return R;
3583 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3584 return R;
3585 }
3586
3587 return SDValue();
3588}
3589
3590SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3591 SDValue CarryIn, SDNode *N) {
3592 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3593 if (isBitwiseNot(N0)) {
3594 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3595 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3596 N0.getOperand(0), NotC);
3597 }
3598
3599 return SDValue();
3600}
3601
3602SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3603 SDValue N0 = N->getOperand(0);
3604 SDValue N1 = N->getOperand(1);
3605 SDValue CarryIn = N->getOperand(2);
3606 SDLoc DL(N);
3607
3608 // canonicalize constant to RHS
3609 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3610 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3611 if (N0C && !N1C)
3612 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3613
3614 // fold (saddo_carry x, y, false) -> (saddo x, y)
3615 if (isNullConstant(CarryIn)) {
3616 if (!LegalOperations ||
3617 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3618 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3619 }
3620
3621 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3622 return Combined;
3623
3624 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3625 return Combined;
3626
3627 return SDValue();
3628}
3629
3630// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3631// clamp/truncation if necessary.
3632static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3633 SDValue RHS, SelectionDAG &DAG,
3634 const SDLoc &DL) {
3635 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3636 "Illegal truncation");
3637
3638 if (DstVT == SrcVT)
3639 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3640
3641 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3642 // clamping RHS.
3644 DstVT.getScalarSizeInBits());
3645 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3646 return SDValue();
3647
3648 SDValue SatLimit =
3650 DstVT.getScalarSizeInBits()),
3651 DL, SrcVT);
3652 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3653 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3654 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3655 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3656}
3657
3658// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3659// usubsat(a,b), optionally as a truncated type.
3660SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3661 if (N->getOpcode() != ISD::SUB ||
3662 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3663 return SDValue();
3664
3665 EVT SubVT = N->getValueType(0);
3666 SDValue Op0 = N->getOperand(0);
3667 SDValue Op1 = N->getOperand(1);
3668
3669 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3670 // they may be converted to usubsat(a,b).
3671 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3672 SDValue MaxLHS = Op0.getOperand(0);
3673 SDValue MaxRHS = Op0.getOperand(1);
3674 if (MaxLHS == Op1)
3675 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3676 if (MaxRHS == Op1)
3677 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3678 }
3679
3680 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3681 SDValue MinLHS = Op1.getOperand(0);
3682 SDValue MinRHS = Op1.getOperand(1);
3683 if (MinLHS == Op0)
3684 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3685 if (MinRHS == Op0)
3686 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3687 }
3688
3689 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3690 if (Op1.getOpcode() == ISD::TRUNCATE &&
3691 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3692 Op1.getOperand(0).hasOneUse()) {
3693 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3694 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3695 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3696 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3697 DAG, DL);
3698 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3699 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3700 DAG, DL);
3701 }
3702
3703 return SDValue();
3704}
3705
3706// Since it may not be valid to emit a fold to zero for vector initializers
3707// check if we can before folding.
3708static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3709 SelectionDAG &DAG, bool LegalOperations) {
3710 if (!VT.isVector())
3711 return DAG.getConstant(0, DL, VT);
3712 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3713 return DAG.getConstant(0, DL, VT);
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUB(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 EVT VT = N0.getValueType();
3721 unsigned BitWidth = VT.getScalarSizeInBits();
3722 SDLoc DL(N);
3723
3724 auto PeekThroughFreeze = [](SDValue N) {
3725 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3726 return N->getOperand(0);
3727 return N;
3728 };
3729
3730 // fold (sub x, x) -> 0
3731 // FIXME: Refactor this and xor and other similar operations together.
3732 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3733 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3734
3735 // fold (sub c1, c2) -> c3
3736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3737 return C;
3738
3739 // fold vector ops
3740 if (VT.isVector()) {
3741 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3742 return FoldedVOp;
3743
3744 // fold (sub x, 0) -> x, vector edition
3746 return N0;
3747 }
3748
3749 if (SDValue NewSel = foldBinOpIntoSelect(N))
3750 return NewSel;
3751
3752 // fold (sub x, c) -> (add x, -c)
3754 return DAG.getNode(ISD::ADD, DL, VT, N0,
3755 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756
3757 if (isNullOrNullSplat(N0)) {
3758 // Right-shifting everything out but the sign bit followed by negation is
3759 // the same as flipping arithmetic/logical shift type without the negation:
3760 // -(X >>u 31) -> (X >>s 31)
3761 // -(X >>s 31) -> (X >>u 31)
3762 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3764 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3765 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3766 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3767 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3768 }
3769 }
3770
3771 // 0 - X --> 0 if the sub is NUW.
3772 if (N->getFlags().hasNoUnsignedWrap())
3773 return N0;
3774
3776 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3777 // N1 must be 0 because negating the minimum signed value is undefined.
3778 if (N->getFlags().hasNoSignedWrap())
3779 return N0;
3780
3781 // 0 - X --> X if X is 0 or the minimum signed value.
3782 return N1;
3783 }
3784
3785 // Convert 0 - abs(x).
3786 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3788 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3789 return Result;
3790
3791 // Fold neg(splat(neg(x)) -> splat(x)
3792 if (VT.isVector()) {
3793 SDValue N1S = DAG.getSplatValue(N1, true);
3794 if (N1S && N1S.getOpcode() == ISD::SUB &&
3795 isNullConstant(N1S.getOperand(0)))
3796 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3797 }
3798 }
3799
3800 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3802 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3803
3804 // fold (A - (0-B)) -> A+B
3805 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3806 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3807
3808 // fold A-(A-B) -> B
3809 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3810 return N1.getOperand(1);
3811
3812 // fold (A+B)-A -> B
3813 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3814 return N0.getOperand(1);
3815
3816 // fold (A+B)-B -> A
3817 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3818 return N0.getOperand(0);
3819
3820 // fold (A+C1)-C2 -> A+(C1-C2)
3821 if (N0.getOpcode() == ISD::ADD) {
3822 SDValue N01 = N0.getOperand(1);
3823 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3824 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3825 }
3826
3827 // fold C2-(A+C1) -> (C2-C1)-A
3828 if (N1.getOpcode() == ISD::ADD) {
3829 SDValue N11 = N1.getOperand(1);
3830 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3831 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3832 }
3833
3834 // fold (A-C1)-C2 -> A-(C1+C2)
3835 if (N0.getOpcode() == ISD::SUB) {
3836 SDValue N01 = N0.getOperand(1);
3837 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3838 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3839 }
3840
3841 // fold (c1-A)-c2 -> (c1-c2)-A
3842 if (N0.getOpcode() == ISD::SUB) {
3843 SDValue N00 = N0.getOperand(0);
3844 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3845 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3846 }
3847
3848 SDValue A, B, C;
3849
3850 // fold ((A+(B+C))-B) -> A+C
3851 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3852 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3853
3854 // fold ((A+(B-C))-B) -> A-C
3855 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3856 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3857
3858 // fold ((A-(B-C))-C) -> A-B
3859 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3860 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3861
3862 // fold (A-(B-C)) -> A+(C-B)
3863 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3864 return DAG.getNode(ISD::ADD, DL, VT, N0,
3865 DAG.getNode(ISD::SUB, DL, VT, C, B));
3866
3867 // A - (A & B) -> A & (~B)
3868 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3869 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3870 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3871
3872 // fold (A - (-B * C)) -> (A + (B * C))
3873 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3874 return DAG.getNode(ISD::ADD, DL, VT, N0,
3875 DAG.getNode(ISD::MUL, DL, VT, B, C));
3876
3877 // If either operand of a sub is undef, the result is undef
3878 if (N0.isUndef())
3879 return N0;
3880 if (N1.isUndef())
3881 return N1;
3882
3883 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3884 return V;
3885
3886 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3887 return V;
3888
3889 // Try to match AVGCEIL fixedwidth pattern
3890 if (SDValue V = foldSubToAvg(N, DL))
3891 return V;
3892
3893 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3894 return V;
3895
3896 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3897 return V;
3898
3899 // (A - B) - 1 -> add (xor B, -1), A
3901 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3902
3903 // Look for:
3904 // sub y, (xor x, -1)
3905 // And if the target does not like this form then turn into:
3906 // add (add x, y), 1
3907 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3908 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3909 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3910 }
3911
3912 // Hoist one-use addition by non-opaque constant:
3913 // (x + C) - y -> (x - y) + C
3914 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3915 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3916 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3917 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3918 }
3919 // y - (x + C) -> (y - x) - C
3920 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3921 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3922 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3923 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3924 }
3925 // (x - C) - y -> (x - y) - C
3926 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3927 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3928 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3929 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3930 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3931 }
3932 // (C - x) - y -> C - (x + y)
3933 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3934 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3935 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3936 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3937 }
3938
3939 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3940 // rather than 'sub 0/1' (the sext should get folded).
3941 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3942 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3943 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3944 TLI.getBooleanContents(VT) ==
3946 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3947 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3948 }
3949
3950 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3951 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3952 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3953 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3954 SDValue S0 = N1.getOperand(0);
3955 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3957 if (C->getAPIntValue() == (BitWidth - 1))
3958 return DAG.getNode(ISD::ABS, DL, VT, S0);
3959 }
3960 }
3961
3962 // If the relocation model supports it, consider symbol offsets.
3963 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3964 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3965 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3966 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3967 if (GA->getGlobal() == GB->getGlobal())
3968 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3969 DL, VT);
3970 }
3971
3972 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3973 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3974 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3975 if (TN->getVT() == MVT::i1) {
3976 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3977 DAG.getConstant(1, DL, VT));
3978 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3979 }
3980 }
3981
3982 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3983 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3984 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3985 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3986 }
3987
3988 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3989 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3990 APInt NewStep = -N1.getConstantOperandAPInt(0);
3991 return DAG.getNode(ISD::ADD, DL, VT, N0,
3992 DAG.getStepVector(DL, VT, NewStep));
3993 }
3994
3995 // Prefer an add for more folding potential and possibly better codegen:
3996 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3997 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3998 SDValue ShAmt = N1.getOperand(1);
3999 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4000 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4001 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4002 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4003 }
4004 }
4005
4006 // As with the previous fold, prefer add for more folding potential.
4007 // Subtracting SMIN/0 is the same as adding SMIN/0:
4008 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4009 if (N1.getOpcode() == ISD::SHL) {
4011 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4012 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4013 }
4014
4015 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4016 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4017 N0.getResNo() == 0 && N0.hasOneUse())
4018 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4019 N0.getOperand(0), N1, N0.getOperand(2));
4020
4022 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4023 if (SDValue Carry = getAsCarry(TLI, N0)) {
4024 SDValue X = N1;
4025 SDValue Zero = DAG.getConstant(0, DL, VT);
4026 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4027 return DAG.getNode(ISD::UADDO_CARRY, DL,
4028 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4029 Carry);
4030 }
4031 }
4032
4033 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4034 // sub C0, X --> xor X, C0
4035 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4036 if (!C0->isOpaque()) {
4037 const APInt &C0Val = C0->getAPIntValue();
4038 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4039 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4040 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4041 }
4042 }
4043
4044 // smax(a,b) - smin(a,b) --> abds(a,b)
4045 if (hasOperation(ISD::ABDS, VT) &&
4046 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4048 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4049
4050 // umax(a,b) - umin(a,b) --> abdu(a,b)
4051 if (hasOperation(ISD::ABDU, VT) &&
4052 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4054 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4055
4056 return SDValue();
4057}
4058
4059SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4060 unsigned Opcode = N->getOpcode();
4061 SDValue N0 = N->getOperand(0);
4062 SDValue N1 = N->getOperand(1);
4063 EVT VT = N0.getValueType();
4064 bool IsSigned = Opcode == ISD::SSUBSAT;
4065 SDLoc DL(N);
4066
4067 // fold (sub_sat x, undef) -> 0
4068 if (N0.isUndef() || N1.isUndef())
4069 return DAG.getConstant(0, DL, VT);
4070
4071 // fold (sub_sat x, x) -> 0
4072 if (N0 == N1)
4073 return DAG.getConstant(0, DL, VT);
4074
4075 // fold (sub_sat c1, c2) -> c3
4076 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4077 return C;
4078
4079 // fold vector ops
4080 if (VT.isVector()) {
4081 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4082 return FoldedVOp;
4083
4084 // fold (sub_sat x, 0) -> x, vector edition
4086 return N0;
4087 }
4088
4089 // fold (sub_sat x, 0) -> x
4090 if (isNullConstant(N1))
4091 return N0;
4092
4093 // If it cannot overflow, transform into an sub.
4094 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4095 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4096
4097 return SDValue();
4098}
4099
4100SDValue DAGCombiner::visitSUBC(SDNode *N) {
4101 SDValue N0 = N->getOperand(0);
4102 SDValue N1 = N->getOperand(1);
4103 EVT VT = N0.getValueType();
4104 SDLoc DL(N);
4105
4106 // If the flag result is dead, turn this into an SUB.
4107 if (!N->hasAnyUseOfValue(1))
4108 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4109 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4110
4111 // fold (subc x, x) -> 0 + no borrow
4112 if (N0 == N1)
4113 return CombineTo(N, DAG.getConstant(0, DL, VT),
4114 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4115
4116 // fold (subc x, 0) -> x + no borrow
4117 if (isNullConstant(N1))
4118 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4119
4120 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4121 if (isAllOnesConstant(N0))
4122 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4123 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4124
4125 return SDValue();
4126}
4127
4128SDValue DAGCombiner::visitSUBO(SDNode *N) {
4129 SDValue N0 = N->getOperand(0);
4130 SDValue N1 = N->getOperand(1);
4131 EVT VT = N0.getValueType();
4132 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4133
4134 EVT CarryVT = N->getValueType(1);
4135 SDLoc DL(N);
4136
4137 // If the flag result is dead, turn this into an SUB.
4138 if (!N->hasAnyUseOfValue(1))
4139 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4140 DAG.getUNDEF(CarryVT));
4141
4142 // fold (subo x, x) -> 0 + no borrow
4143 if (N0 == N1)
4144 return CombineTo(N, DAG.getConstant(0, DL, VT),
4145 DAG.getConstant(0, DL, CarryVT));
4146
4147 // fold (subox, c) -> (addo x, -c)
4149 if (IsSigned && !N1C->isMinSignedValue())
4150 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4151 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4152
4153 // fold (subo x, 0) -> x + no borrow
4154 if (isNullOrNullSplat(N1))
4155 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4156
4157 // If it cannot overflow, transform into an sub.
4158 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4159 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4160 DAG.getConstant(0, DL, CarryVT));
4161
4162 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4163 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4164 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4165 DAG.getConstant(0, DL, CarryVT));
4166
4167 return SDValue();
4168}
4169
4170SDValue DAGCombiner::visitSUBE(SDNode *N) {
4171 SDValue N0 = N->getOperand(0);
4172 SDValue N1 = N->getOperand(1);
4173 SDValue CarryIn = N->getOperand(2);
4174
4175 // fold (sube x, y, false) -> (subc x, y)
4176 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4177 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4178
4179 return SDValue();
4180}
4181
4182SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4183 SDValue N0 = N->getOperand(0);
4184 SDValue N1 = N->getOperand(1);
4185 SDValue CarryIn = N->getOperand(2);
4186
4187 // fold (usubo_carry x, y, false) -> (usubo x, y)
4188 if (isNullConstant(CarryIn)) {
4189 if (!LegalOperations ||
4190 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4191 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4192 }
4193
4194 return SDValue();
4195}
4196
4197SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4198 SDValue N0 = N->getOperand(0);
4199 SDValue N1 = N->getOperand(1);
4200 SDValue CarryIn = N->getOperand(2);
4201
4202 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4203 if (isNullConstant(CarryIn)) {
4204 if (!LegalOperations ||
4205 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4206 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4207 }
4208
4209 return SDValue();
4210}
4211
4212// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4213// UMULFIXSAT here.
4214SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4215 SDValue N0 = N->getOperand(0);
4216 SDValue N1 = N->getOperand(1);
4217 SDValue Scale = N->getOperand(2);
4218 EVT VT = N0.getValueType();
4219
4220 // fold (mulfix x, undef, scale) -> 0
4221 if (N0.isUndef() || N1.isUndef())
4222 return DAG.getConstant(0, SDLoc(N), VT);
4223
4224 // Canonicalize constant to RHS (vector doesn't have to splat)
4227 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4228
4229 // fold (mulfix x, 0, scale) -> 0
4230 if (isNullConstant(N1))
4231 return DAG.getConstant(0, SDLoc(N), VT);
4232
4233 return SDValue();
4234}
4235
4236SDValue DAGCombiner::visitMUL(SDNode *N) {
4237 SDValue N0 = N->getOperand(0);
4238 SDValue N1 = N->getOperand(1);
4239 EVT VT = N0.getValueType();
4240 SDLoc DL(N);
4241
4242 // fold (mul x, undef) -> 0
4243 if (N0.isUndef() || N1.isUndef())
4244 return DAG.getConstant(0, DL, VT);
4245
4246 // fold (mul c1, c2) -> c1*c2
4247 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4248 return C;
4249
4250 // canonicalize constant to RHS (vector doesn't have to splat)
4253 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4254
4255 bool N1IsConst = false;
4256 bool N1IsOpaqueConst = false;
4257 APInt ConstValue1;
4258
4259 // fold vector ops
4260 if (VT.isVector()) {
4261 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4262 return FoldedVOp;
4263
4264 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4265 assert((!N1IsConst ||
4266 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4267 "Splat APInt should be element width");
4268 } else {
4269 N1IsConst = isa<ConstantSDNode>(N1);
4270 if (N1IsConst) {
4271 ConstValue1 = N1->getAsAPIntVal();
4272 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4273 }
4274 }
4275
4276 // fold (mul x, 0) -> 0
4277 if (N1IsConst && ConstValue1.isZero())
4278 return N1;
4279
4280 // fold (mul x, 1) -> x
4281 if (N1IsConst && ConstValue1.isOne())
4282 return N0;
4283
4284 if (SDValue NewSel = foldBinOpIntoSelect(N))
4285 return NewSel;
4286
4287 // fold (mul x, -1) -> 0-x
4288 if (N1IsConst && ConstValue1.isAllOnes())
4289 return DAG.getNegative(N0, DL, VT);
4290
4291 // fold (mul x, (1 << c)) -> x << c
4292 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4293 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4294 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4295 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4296 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4297 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4298 }
4299 }
4300
4301 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4302 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4303 unsigned Log2Val = (-ConstValue1).logBase2();
4304 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4305
4306 // FIXME: If the input is something that is easily negated (e.g. a
4307 // single-use add), we should put the negate there.
4308 return DAG.getNode(ISD::SUB, DL, VT,
4309 DAG.getConstant(0, DL, VT),
4310 DAG.getNode(ISD::SHL, DL, VT, N0,
4311 DAG.getConstant(Log2Val, DL, ShiftVT)));
4312 }
4313
4314 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4315 // hi result is in use in case we hit this mid-legalization.
4316 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4317 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4318 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4319 // TODO: Can we match commutable operands with getNodeIfExists?
4320 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4321 if (LoHi->hasAnyUseOfValue(1))
4322 return SDValue(LoHi, 0);
4323 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4324 if (LoHi->hasAnyUseOfValue(1))
4325 return SDValue(LoHi, 0);
4326 }
4327 }
4328
4329 // Try to transform:
4330 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4331 // mul x, (2^N + 1) --> add (shl x, N), x
4332 // mul x, (2^N - 1) --> sub (shl x, N), x
4333 // Examples: x * 33 --> (x << 5) + x
4334 // x * 15 --> (x << 4) - x
4335 // x * -33 --> -((x << 5) + x)
4336 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4337 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4338 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4339 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4340 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4341 // x * 0xf800 --> (x << 16) - (x << 11)
4342 // x * -0x8800 --> -((x << 15) + (x << 11))
4343 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4344 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4345 // TODO: We could handle more general decomposition of any constant by
4346 // having the target set a limit on number of ops and making a
4347 // callback to determine that sequence (similar to sqrt expansion).
4348 unsigned MathOp = ISD::DELETED_NODE;
4349 APInt MulC = ConstValue1.abs();
4350 // The constant `2` should be treated as (2^0 + 1).
4351 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4352 MulC.lshrInPlace(TZeros);
4353 if ((MulC - 1).isPowerOf2())
4354 MathOp = ISD::ADD;
4355 else if ((MulC + 1).isPowerOf2())
4356 MathOp = ISD::SUB;
4357
4358 if (MathOp != ISD::DELETED_NODE) {
4359 unsigned ShAmt =
4360 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4361 ShAmt += TZeros;
4362 assert(ShAmt < VT.getScalarSizeInBits() &&
4363 "multiply-by-constant generated out of bounds shift");
4364 SDValue Shl =
4365 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4366 SDValue R =
4367 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4368 DAG.getNode(ISD::SHL, DL, VT, N0,
4369 DAG.getConstant(TZeros, DL, VT)))
4370 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4371 if (ConstValue1.isNegative())
4372 R = DAG.getNegative(R, DL, VT);
4373 return R;
4374 }
4375 }
4376
4377 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4378 if (N0.getOpcode() == ISD::SHL) {
4379 SDValue N01 = N0.getOperand(1);
4380 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4381 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4382 }
4383
4384 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4385 // use.
4386 {
4387 SDValue Sh, Y;
4388
4389 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4390 if (N0.getOpcode() == ISD::SHL &&
4392 Sh = N0; Y = N1;
4393 } else if (N1.getOpcode() == ISD::SHL &&
4395 N1->hasOneUse()) {
4396 Sh = N1; Y = N0;
4397 }
4398
4399 if (Sh.getNode()) {
4400 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4401 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4402 }
4403 }
4404
4405 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4406 if (N0.getOpcode() == ISD::ADD &&
4410 return DAG.getNode(
4411 ISD::ADD, DL, VT,
4412 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4413 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4414
4415 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4417 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4418 const APInt &C0 = N0.getConstantOperandAPInt(0);
4419 const APInt &C1 = NC1->getAPIntValue();
4420 return DAG.getVScale(DL, VT, C0 * C1);
4421 }
4422
4423 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4424 APInt MulVal;
4425 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4426 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4427 const APInt &C0 = N0.getConstantOperandAPInt(0);
4428 APInt NewStep = C0 * MulVal;
4429 return DAG.getStepVector(DL, VT, NewStep);
4430 }
4431
4432 // Fold ((mul x, 0/undef) -> 0,
4433 // (mul x, 1) -> x) -> x)
4434 // -> and(x, mask)
4435 // We can replace vectors with '0' and '1' factors with a clearing mask.
4436 if (VT.isFixedLengthVector()) {
4437 unsigned NumElts = VT.getVectorNumElements();
4438 SmallBitVector ClearMask;
4439 ClearMask.reserve(NumElts);
4440 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4441 if (!V || V->isZero()) {
4442 ClearMask.push_back(true);
4443 return true;
4444 }
4445 ClearMask.push_back(false);
4446 return V->isOne();
4447 };
4448 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4449 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4450 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4451 EVT LegalSVT = N1.getOperand(0).getValueType();
4452 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4453 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4455 for (unsigned I = 0; I != NumElts; ++I)
4456 if (ClearMask[I])
4457 Mask[I] = Zero;
4458 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4459 }
4460 }
4461
4462 // reassociate mul
4463 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4464 return RMUL;
4465
4466 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4467 if (SDValue SD =
4468 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4469 return SD;
4470
4471 // Simplify the operands using demanded-bits information.
4473 return SDValue(N, 0);
4474
4475 return SDValue();
4476}
4477
4478/// Return true if divmod libcall is available.
4480 const TargetLowering &TLI) {
4481 RTLIB::Libcall LC;
4482 EVT NodeType = Node->getValueType(0);
4483 if (!NodeType.isSimple())
4484 return false;
4485 switch (NodeType.getSimpleVT().SimpleTy) {
4486 default: return false; // No libcall for vector types.
4487 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4488 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4489 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4490 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4491 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4492 }
4493
4494 return TLI.getLibcallName(LC) != nullptr;
4495}
4496
4497/// Issue divrem if both quotient and remainder are needed.
4498SDValue DAGCombiner::useDivRem(SDNode *Node) {
4499 if (Node->use_empty())
4500 return SDValue(); // This is a dead node, leave it alone.
4501
4502 unsigned Opcode = Node->getOpcode();
4503 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4504 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4505
4506 // DivMod lib calls can still work on non-legal types if using lib-calls.
4507 EVT VT = Node->getValueType(0);
4508 if (VT.isVector() || !VT.isInteger())
4509 return SDValue();
4510
4511 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4512 return SDValue();
4513
4514 // If DIVREM is going to get expanded into a libcall,
4515 // but there is no libcall available, then don't combine.
4516 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4518 return SDValue();
4519
4520 // If div is legal, it's better to do the normal expansion
4521 unsigned OtherOpcode = 0;
4522 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4523 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4524 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4525 return SDValue();
4526 } else {
4527 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4528 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4529 return SDValue();
4530 }
4531
4532 SDValue Op0 = Node->getOperand(0);
4533 SDValue Op1 = Node->getOperand(1);
4534 SDValue combined;
4535 for (SDNode *User : Op0->uses()) {
4536 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4537 User->use_empty())
4538 continue;
4539 // Convert the other matching node(s), too;
4540 // otherwise, the DIVREM may get target-legalized into something
4541 // target-specific that we won't be able to recognize.
4542 unsigned UserOpc = User->getOpcode();
4543 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4544 User->getOperand(0) == Op0 &&
4545 User->getOperand(1) == Op1) {
4546 if (!combined) {
4547 if (UserOpc == OtherOpcode) {
4548 SDVTList VTs = DAG.getVTList(VT, VT);
4549 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4550 } else if (UserOpc == DivRemOpc) {
4551 combined = SDValue(User, 0);
4552 } else {
4553 assert(UserOpc == Opcode);
4554 continue;
4555 }
4556 }
4557 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4558 CombineTo(User, combined);
4559 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4560 CombineTo(User, combined.getValue(1));
4561 }
4562 }
4563 return combined;
4564}
4565
4567 SDValue N0 = N->getOperand(0);
4568 SDValue N1 = N->getOperand(1);
4569 EVT VT = N->getValueType(0);
4570 SDLoc DL(N);
4571
4572 unsigned Opc = N->getOpcode();
4573 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4575
4576 // X / undef -> undef
4577 // X % undef -> undef
4578 // X / 0 -> undef
4579 // X % 0 -> undef
4580 // NOTE: This includes vectors where any divisor element is zero/undef.
4581 if (DAG.isUndef(Opc, {N0, N1}))
4582 return DAG.getUNDEF(VT);
4583
4584 // undef / X -> 0
4585 // undef % X -> 0
4586 if (N0.isUndef())
4587 return DAG.getConstant(0, DL, VT);
4588
4589 // 0 / X -> 0
4590 // 0 % X -> 0
4592 if (N0C && N0C->isZero())
4593 return N0;
4594
4595 // X / X -> 1
4596 // X % X -> 0
4597 if (N0 == N1)
4598 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4599
4600 // X / 1 -> X
4601 // X % 1 -> 0
4602 // If this is a boolean op (single-bit element type), we can't have
4603 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4604 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4605 // it's a 1.
4606 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4607 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4608
4609 return SDValue();
4610}
4611
4612SDValue DAGCombiner::visitSDIV(SDNode *N) {
4613 SDValue N0 = N->getOperand(0);
4614 SDValue N1 = N->getOperand(1);
4615 EVT VT = N->getValueType(0);
4616 EVT CCVT = getSetCCResultType(VT);
4617 SDLoc DL(N);
4618
4619 // fold (sdiv c1, c2) -> c1/c2
4620 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4621 return C;
4622
4623 // fold vector ops
4624 if (VT.isVector())
4625 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4626 return FoldedVOp;
4627
4628 // fold (sdiv X, -1) -> 0-X
4630 if (N1C && N1C->isAllOnes())
4631 return DAG.getNegative(N0, DL, VT);
4632
4633 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4634 if (N1C && N1C->isMinSignedValue())
4635 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4636 DAG.getConstant(1, DL, VT),
4637 DAG.getConstant(0, DL, VT));
4638
4639 if (SDValue V = simplifyDivRem(N, DAG))
4640 return V;
4641
4642 if (SDValue NewSel = foldBinOpIntoSelect(N))
4643 return NewSel;
4644
4645 // If we know the sign bits of both operands are zero, strength reduce to a
4646 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4647 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4648 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4649
4650 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4651 // If the corresponding remainder node exists, update its users with
4652 // (Dividend - (Quotient * Divisor).
4653 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4654 { N0, N1 })) {
4655 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4656 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4657 AddToWorklist(Mul.getNode());
4658 AddToWorklist(Sub.getNode());
4659 CombineTo(RemNode, Sub);
4660 }
4661 return V;
4662 }
4663
4664 // sdiv, srem -> sdivrem
4665 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4666 // true. Otherwise, we break the simplification logic in visitREM().
4668 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4669 if (SDValue DivRem = useDivRem(N))
4670 return DivRem;
4671
4672 return SDValue();
4673}
4674
4675static bool isDivisorPowerOfTwo(SDValue Divisor) {
4676 // Helper for determining whether a value is a power-2 constant scalar or a
4677 // vector of such elements.
4678 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4679 if (C->isZero() || C->isOpaque())
4680 return false;
4681 if (C->getAPIntValue().isPowerOf2())
4682 return true;
4683 if (C->getAPIntValue().isNegatedPowerOf2())
4684 return true;
4685 return false;
4686 };
4687
4688 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4689}
4690
4691SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4692 SDLoc DL(N);
4693 EVT VT = N->getValueType(0);
4694 EVT CCVT = getSetCCResultType(VT);
4695 unsigned BitWidth = VT.getScalarSizeInBits();
4696
4697 // fold (sdiv X, pow2) -> simple ops after legalize
4698 // FIXME: We check for the exact bit here because the generic lowering gives
4699 // better results in that case. The target-specific lowering should learn how
4700 // to handle exact sdivs efficiently.
4701 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4702 // Target-specific implementation of sdiv x, pow2.
4703 if (SDValue Res = BuildSDIVPow2(N))
4704 return Res;
4705
4706 // Create constants that are functions of the shift amount value.
4707 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4708 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4709 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4710 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4711 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4712 if (!isConstantOrConstantVector(Inexact))
4713 return SDValue();
4714
4715 // Splat the sign bit into the register
4716 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4717 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4718 AddToWorklist(Sign.getNode());
4719
4720 // Add (N0 < 0) ? abs2 - 1 : 0;
4721 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4722 AddToWorklist(Srl.getNode());
4723 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4724 AddToWorklist(Add.getNode());
4725 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4726 AddToWorklist(Sra.getNode());
4727
4728 // Special case: (sdiv X, 1) -> X
4729 // Special Case: (sdiv X, -1) -> 0-X
4730 SDValue One = DAG.getConstant(1, DL, VT);
4732 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4733 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4734 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4735 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4736
4737 // If dividing by a positive value, we're done. Otherwise, the result must
4738 // be negated.
4739 SDValue Zero = DAG.getConstant(0, DL, VT);
4740 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4741
4742 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4743 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4744 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4745 return Res;
4746 }
4747
4748 // If integer divide is expensive and we satisfy the requirements, emit an
4749 // alternate sequence. Targets may check function attributes for size/speed
4750 // trade-offs.
4753 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4754 if (SDValue Op = BuildSDIV(N))
4755 return Op;
4756
4757 return SDValue();
4758}
4759
4760SDValue DAGCombiner::visitUDIV(SDNode *N) {
4761 SDValue N0 = N->getOperand(0);
4762 SDValue N1 = N->getOperand(1);
4763 EVT VT = N->getValueType(0);
4764 EVT CCVT = getSetCCResultType(VT);
4765 SDLoc DL(N);
4766
4767 // fold (udiv c1, c2) -> c1/c2
4768 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4769 return C;
4770
4771 // fold vector ops
4772 if (VT.isVector())
4773 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4774 return FoldedVOp;
4775
4776 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4778 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4779 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4780 DAG.getConstant(1, DL, VT),
4781 DAG.getConstant(0, DL, VT));
4782 }
4783
4784 if (SDValue V = simplifyDivRem(N, DAG))
4785 return V;
4786
4787 if (SDValue NewSel = foldBinOpIntoSelect(N))
4788 return NewSel;
4789
4790 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4791 // If the corresponding remainder node exists, update its users with
4792 // (Dividend - (Quotient * Divisor).
4793 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4794 { N0, N1 })) {
4795 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4796 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4797 AddToWorklist(Mul.getNode());
4798 AddToWorklist(Sub.getNode());
4799 CombineTo(RemNode, Sub);
4800 }
4801 return V;
4802 }
4803
4804 // sdiv, srem -> sdivrem
4805 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4806 // true. Otherwise, we break the simplification logic in visitREM().
4808 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4809 if (SDValue DivRem = useDivRem(N))
4810 return DivRem;
4811
4812 return SDValue();
4813}
4814
4815SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4816 SDLoc DL(N);
4817 EVT VT = N->getValueType(0);
4818
4819 // fold (udiv x, (1 << c)) -> x >>u c
4820 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4821 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4822 AddToWorklist(LogBase2.getNode());
4823
4824 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4825 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4826 AddToWorklist(Trunc.getNode());
4827 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4828 }
4829 }
4830
4831 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4832 if (N1.getOpcode() == ISD::SHL) {
4833 SDValue N10 = N1.getOperand(0);
4834 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4835 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4836 AddToWorklist(LogBase2.getNode());
4837
4838 EVT ADDVT = N1.getOperand(1).getValueType();
4839 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4840 AddToWorklist(Trunc.getNode());
4841 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4842 AddToWorklist(Add.getNode());
4843 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4844 }
4845 }
4846 }
4847
4848 // fold (udiv x, c) -> alternate
4851 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4852 if (SDValue Op = BuildUDIV(N))
4853 return Op;
4854
4855 return SDValue();
4856}
4857
4858SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4859 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4860 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4861 // Target-specific implementation of srem x, pow2.
4862 if (SDValue Res = BuildSREMPow2(N))
4863 return Res;
4864 }
4865 return SDValue();
4866}
4867
4868// handles ISD::SREM and ISD::UREM
4869SDValue DAGCombiner::visitREM(SDNode *N) {
4870 unsigned Opcode = N->getOpcode();
4871 SDValue N0 = N->getOperand(0);
4872 SDValue N1 = N->getOperand(1);
4873 EVT VT = N->getValueType(0);
4874 EVT CCVT = getSetCCResultType(VT);
4875
4876 bool isSigned = (Opcode == ISD::SREM);
4877 SDLoc DL(N);
4878
4879 // fold (rem c1, c2) -> c1%c2
4880 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4881 return C;
4882
4883 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4884 // Freeze the numerator to avoid a miscompile with an undefined value.
4885 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4886 CCVT.isVector() == VT.isVector()) {
4887 SDValue F0 = DAG.getFreeze(N0);
4888 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4889 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4890 }
4891
4892 if (SDValue V = simplifyDivRem(N, DAG))
4893 return V;
4894
4895 if (SDValue NewSel = foldBinOpIntoSelect(N))
4896 return NewSel;
4897
4898 if (isSigned) {
4899 // If we know the sign bits of both operands are zero, strength reduce to a
4900 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4901 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4902 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4903 } else {
4904 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4905 // fold (urem x, pow2) -> (and x, pow2-1)
4906 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4907 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4908 AddToWorklist(Add.getNode());
4909 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4910 }
4911 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4912 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4913 // TODO: We should sink the following into isKnownToBePowerOfTwo
4914 // using a OrZero parameter analogous to our handling in ValueTracking.
4915 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4917 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4918 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4919 AddToWorklist(Add.getNode());
4920 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4921 }
4922 }
4923
4925
4926 // If X/C can be simplified by the division-by-constant logic, lower
4927 // X%C to the equivalent of X-X/C*C.
4928 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4929 // speculative DIV must not cause a DIVREM conversion. We guard against this
4930 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4931 // combine will not return a DIVREM. Regardless, checking cheapness here
4932 // makes sense since the simplification results in fatter code.
4933 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4934 if (isSigned) {
4935 // check if we can build faster implementation for srem
4936 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4937 return OptimizedRem;
4938 }
4939
4940 SDValue OptimizedDiv =
4941 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4942 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4943 // If the equivalent Div node also exists, update its users.
4944 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4945 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4946 { N0, N1 }))
4947 CombineTo(DivNode, OptimizedDiv);
4948 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4949 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4950 AddToWorklist(OptimizedDiv.getNode());
4951 AddToWorklist(Mul.getNode());
4952 return Sub;
4953 }
4954 }
4955
4956 // sdiv, srem -> sdivrem
4957 if (SDValue DivRem = useDivRem(N))
4958 return DivRem.getValue(1);
4959
4960 return SDValue();
4961}
4962
4963SDValue DAGCombiner::visitMULHS(SDNode *N) {
4964 SDValue N0 = N->getOperand(0);
4965 SDValue N1 = N->getOperand(1);
4966 EVT VT = N->getValueType(0);
4967 SDLoc DL(N);
4968
4969 // fold (mulhs c1, c2)
4970 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4971 return C;
4972
4973 // canonicalize constant to RHS.
4976 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4977
4978 if (VT.isVector()) {
4979 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980 return FoldedVOp;
4981
4982 // fold (mulhs x, 0) -> 0
4983 // do not return N1, because undef node may exist.
4985 return DAG.getConstant(0, DL, VT);
4986 }
4987
4988 // fold (mulhs x, 0) -> 0
4989 if (isNullConstant(N1))
4990 return N1;
4991
4992 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4993 if (isOneConstant(N1))
4994 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4997
4998 // fold (mulhs x, undef) -> 0
4999 if (N0.isUndef() || N1.isUndef())
5000 return DAG.getConstant(0, DL, VT);
5001
5002 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5003 // plus a shift.
5004 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5005 !VT.isVector()) {
5006 MVT Simple = VT.getSimpleVT();
5007 unsigned SimpleSize = Simple.getSizeInBits();
5008 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5009 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5010 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5011 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5012 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5013 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5014 DAG.getConstant(SimpleSize, DL,
5016 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5017 }
5018 }
5019
5020 return SDValue();
5021}
5022
5023SDValue DAGCombiner::visitMULHU(SDNode *N) {
5024 SDValue N0 = N->getOperand(0);
5025 SDValue N1 = N->getOperand(1);
5026 EVT VT = N->getValueType(0);
5027 SDLoc DL(N);
5028
5029 // fold (mulhu c1, c2)
5030 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5031 return C;
5032
5033 // canonicalize constant to RHS.
5036 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5037
5038 if (VT.isVector()) {
5039 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5040 return FoldedVOp;
5041
5042 // fold (mulhu x, 0) -> 0
5043 // do not return N1, because undef node may exist.
5045 return DAG.getConstant(0, DL, VT);
5046 }
5047
5048 // fold (mulhu x, 0) -> 0
5049 if (isNullConstant(N1))
5050 return N1;
5051
5052 // fold (mulhu x, 1) -> 0
5053 if (isOneConstant(N1))
5054 return DAG.getConstant(0, DL, N0.getValueType());
5055
5056 // fold (mulhu x, undef) -> 0
5057 if (N0.isUndef() || N1.isUndef())
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5061 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5062 hasOperation(ISD::SRL, VT)) {
5063 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5064 unsigned NumEltBits = VT.getScalarSizeInBits();
5065 SDValue SRLAmt = DAG.getNode(
5066 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5067 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5068 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5069 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5070 }
5071 }
5072
5073 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5074 // plus a shift.
5075 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5076 !VT.isVector()) {
5077 MVT Simple = VT.getSimpleVT();
5078 unsigned SimpleSize = Simple.getSizeInBits();
5079 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5080 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5081 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5082 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5083 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5084 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5085 DAG.getConstant(SimpleSize, DL,
5087 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5088 }
5089 }
5090
5091 // Simplify the operands using demanded-bits information.
5092 // We don't have demanded bits support for MULHU so this just enables constant
5093 // folding based on known bits.
5095 return SDValue(N, 0);
5096
5097 return SDValue();
5098}
5099
5100SDValue DAGCombiner::visitAVG(SDNode *N) {
5101 unsigned Opcode = N->getOpcode();
5102 SDValue N0 = N->getOperand(0);
5103 SDValue N1 = N->getOperand(1);
5104 EVT VT = N->getValueType(0);
5105 SDLoc DL(N);
5106
5107 // fold (avg c1, c2)
5108 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5109 return C;
5110
5111 // canonicalize constant to RHS.
5114 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5115
5116 if (VT.isVector()) {
5117 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5118 return FoldedVOp;
5119
5120 // fold (avgfloor x, 0) -> x >> 1
5122 if (Opcode == ISD::AVGFLOORS)
5123 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5124 if (Opcode == ISD::AVGFLOORU)
5125 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5126 }
5127 }
5128
5129 // fold (avg x, undef) -> x
5130 if (N0.isUndef())
5131 return N1;
5132 if (N1.isUndef())
5133 return N0;
5134
5135 // Fold (avg x, x) --> x
5136 if (N0 == N1 && Level >= AfterLegalizeTypes)
5137 return N0;
5138
5139 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5140
5141 return SDValue();
5142}
5143
5144SDValue DAGCombiner::visitABD(SDNode *N) {
5145 unsigned Opcode = N->getOpcode();
5146 SDValue N0 = N->getOperand(0);
5147 SDValue N1 = N->getOperand(1);
5148 EVT VT = N->getValueType(0);
5149 SDLoc DL(N);
5150
5151 // fold (abd c1, c2)
5152 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5153 return C;
5154
5155 // canonicalize constant to RHS.
5158 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5159
5160 if (VT.isVector()) {
5161 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5162 return FoldedVOp;
5163
5164 // fold (abds x, 0) -> abs x
5165 // fold (abdu x, 0) -> x
5167 if (Opcode == ISD::ABDS)
5168 return DAG.getNode(ISD::ABS, DL, VT, N0);
5169 if (Opcode == ISD::ABDU)
5170 return N0;
5171 }
5172 }
5173
5174 // fold (abd x, undef) -> 0
5175 if (N0.isUndef() || N1.isUndef())
5176 return DAG.getConstant(0, DL, VT);
5177
5178 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5179 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5180 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5181 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5182
5183 return SDValue();
5184}
5185
5186/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5187/// give the opcodes for the two computations that are being performed. Return
5188/// true if a simplification was made.
5189SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5190 unsigned HiOp) {
5191 // If the high half is not needed, just compute the low half.
5192 bool HiExists = N->hasAnyUseOfValue(1);
5193 if (!HiExists && (!LegalOperations ||
5194 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5195 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5196 return CombineTo(N, Res, Res);
5197 }
5198
5199 // If the low half is not needed, just compute the high half.
5200 bool LoExists = N->hasAnyUseOfValue(0);
5201 if (!LoExists && (!LegalOperations ||
5202 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5203 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5204 return CombineTo(N, Res, Res);
5205 }
5206
5207 // If both halves are used, return as it is.
5208 if (LoExists && HiExists)
5209 return SDValue();
5210
5211 // If the two computed results can be simplified separately, separate them.
5212 if (LoExists) {
5213 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5214 AddToWorklist(Lo.getNode());
5215 SDValue LoOpt = combine(Lo.getNode());
5216 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5217 (!LegalOperations ||
5218 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5219 return CombineTo(N, LoOpt, LoOpt);
5220 }
5221
5222 if (HiExists) {
5223 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5224 AddToWorklist(Hi.getNode());
5225 SDValue HiOpt = combine(Hi.getNode());
5226 if (HiOpt.getNode() && HiOpt != Hi &&
5227 (!LegalOperations ||
5228 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5229 return CombineTo(N, HiOpt, HiOpt);
5230 }
5231
5232 return SDValue();
5233}
5234
5235SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5236 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5237 return Res;
5238
5239 SDValue N0 = N->getOperand(0);
5240 SDValue N1 = N->getOperand(1);
5241 EVT VT = N->getValueType(0);
5242 SDLoc DL(N);
5243
5244 // Constant fold.
5245 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5246 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5247
5248 // canonicalize constant to RHS (vector doesn't have to splat)
5251 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5252
5253 // If the type is twice as wide is legal, transform the mulhu to a wider
5254 // multiply plus a shift.
5255 if (VT.isSimple() && !VT.isVector()) {
5256 MVT Simple = VT.getSimpleVT();
5257 unsigned SimpleSize = Simple.getSizeInBits();
5258 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5259 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5260 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5261 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5262 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5263 // Compute the high part as N1.
5264 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5265 DAG.getConstant(SimpleSize, DL,
5266 getShiftAmountTy(Lo.getValueType())));
5267 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5268 // Compute the low part as N0.
5269 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5270 return CombineTo(N, Lo, Hi);
5271 }
5272 }
5273
5274 return SDValue();
5275}
5276
5277SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5278 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5279 return Res;
5280
5281 SDValue N0 = N->getOperand(0);
5282 SDValue N1 = N->getOperand(1);
5283 EVT VT = N->getValueType(0);
5284 SDLoc DL(N);
5285
5286 // Constant fold.
5287 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5288 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5289
5290 // canonicalize constant to RHS (vector doesn't have to splat)
5293 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5294
5295 // (umul_lohi N0, 0) -> (0, 0)
5296 if (isNullConstant(N1)) {
5297 SDValue Zero = DAG.getConstant(0, DL, VT);
5298 return CombineTo(N, Zero, Zero);
5299 }
5300
5301 // (umul_lohi N0, 1) -> (N0, 0)
5302 if (isOneConstant(N1)) {
5303 SDValue Zero = DAG.getConstant(0, DL, VT);
5304 return CombineTo(N, N0, Zero);
5305 }
5306
5307 // If the type is twice as wide is legal, transform the mulhu to a wider
5308 // multiply plus a shift.
5309 if (VT.isSimple() && !VT.isVector()) {
5310 MVT Simple = VT.getSimpleVT();
5311 unsigned SimpleSize = Simple.getSizeInBits();
5312 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5313 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5314 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5315 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5316 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5317 // Compute the high part as N1.
5318 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5319 DAG.getConstant(SimpleSize, DL,
5320 getShiftAmountTy(Lo.getValueType())));
5321 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5322 // Compute the low part as N0.
5323 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5324 return CombineTo(N, Lo, Hi);
5325 }
5326 }
5327
5328 return SDValue();
5329}
5330
5331SDValue DAGCombiner::visitMULO(SDNode *N) {
5332 SDValue N0 = N->getOperand(0);
5333 SDValue N1 = N->getOperand(1);
5334 EVT VT = N0.getValueType();
5335 bool IsSigned = (ISD::SMULO == N->getOpcode());
5336
5337 EVT CarryVT = N->getValueType(1);
5338 SDLoc DL(N);
5339
5342
5343 // fold operation with constant operands.
5344 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5345 // multiple results.
5346 if (N0C && N1C) {
5347 bool Overflow;
5348 APInt Result =
5349 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5350 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5351 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5352 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5353 }
5354
5355 // canonicalize constant to RHS.
5358 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5359
5360 // fold (mulo x, 0) -> 0 + no carry out
5361 if (isNullOrNullSplat(N1))
5362 return CombineTo(N, DAG.getConstant(0, DL, VT),
5363 DAG.getConstant(0, DL, CarryVT));
5364
5365 // (mulo x, 2) -> (addo x, x)
5366 // FIXME: This needs a freeze.
5367 if (N1C && N1C->getAPIntValue() == 2 &&
5368 (!IsSigned || VT.getScalarSizeInBits() > 2))
5369 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5370 N->getVTList(), N0, N0);
5371
5372 // A 1 bit SMULO overflows if both inputs are 1.
5373 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5374 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5375 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5376 DAG.getConstant(0, DL, VT), ISD::SETNE);
5377 return CombineTo(N, And, Cmp);
5378 }
5379
5380 // If it cannot overflow, transform into a mul.
5381 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5382 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5383 DAG.getConstant(0, DL, CarryVT));
5384 return SDValue();
5385}
5386
5387// Function to calculate whether the Min/Max pair of SDNodes (potentially
5388// swapped around) make a signed saturate pattern, clamping to between a signed
5389// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5390// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5391// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5392// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5394 SDValue N3, ISD::CondCode CC, unsigned &BW,
5395 bool &Unsigned, SelectionDAG &DAG) {
5396 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5397 ISD::CondCode CC) {
5398 // The compare and select operand should be the same or the select operands
5399 // should be truncated versions of the comparison.
5400 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5401 return 0;
5402 // The constants need to be the same or a truncated version of each other.
5405 if (!N1C || !N3C)
5406 return 0;
5407 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5408 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5409 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5410 return 0;
5411 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5412 };
5413
5414 // Check the initial value is a SMIN/SMAX equivalent.
5415 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5416 if (!Opcode0)
5417 return SDValue();
5418
5419 // We could only need one range check, if the fptosi could never produce
5420 // the upper value.
5421 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5422 if (isNullOrNullSplat(N3)) {
5423 EVT IntVT = N0.getValueType().getScalarType();
5424 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5425 if (FPVT.isSimple()) {
5426 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5427 const fltSemantics &Semantics = InputTy->getFltSemantics();
5428 uint32_t MinBitWidth =
5429 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5430 if (IntVT.getSizeInBits() >= MinBitWidth) {
5431 Unsigned = true;
5432 BW = PowerOf2Ceil(MinBitWidth);
5433 return N0;
5434 }
5435 }
5436 }
5437 }
5438
5439 SDValue N00, N01, N02, N03;
5440 ISD::CondCode N0CC;
5441 switch (N0.getOpcode()) {
5442 case ISD::SMIN:
5443 case ISD::SMAX:
5444 N00 = N02 = N0.getOperand(0);
5445 N01 = N03 = N0.getOperand(1);
5446 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5447 break;
5448 case ISD::SELECT_CC:
5449 N00 = N0.getOperand(0);
5450 N01 = N0.getOperand(1);
5451 N02 = N0.getOperand(2);
5452 N03 = N0.getOperand(3);
5453 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5454 break;
5455 case ISD::SELECT:
5456 case ISD::VSELECT:
5457 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5458 return SDValue();
5459 N00 = N0.getOperand(0).getOperand(0);
5460 N01 = N0.getOperand(0).getOperand(1);
5461 N02 = N0.getOperand(1);
5462 N03 = N0.getOperand(2);
5463 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5464 break;
5465 default:
5466 return SDValue();
5467 }
5468
5469 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5470 if (!Opcode1 || Opcode0 == Opcode1)
5471 return SDValue();
5472
5473 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5474 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5475 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5476 return SDValue();
5477
5478 const APInt &MinC = MinCOp->getAPIntValue();
5479 const APInt &MaxC = MaxCOp->getAPIntValue();
5480 APInt MinCPlus1 = MinC + 1;
5481 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5482 BW = MinCPlus1.exactLogBase2() + 1;
5483 Unsigned = false;
5484 return N02;
5485 }
5486
5487 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5488 BW = MinCPlus1.exactLogBase2();
5489 Unsigned = true;
5490 return N02;
5491 }
5492
5493 return SDValue();
5494}
5495
5498 SelectionDAG &DAG) {
5499 unsigned BW;
5500 bool Unsigned;
5501 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5502 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5503 return SDValue();
5504 EVT FPVT = Fp.getOperand(0).getValueType();
5505 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5506 if (FPVT.isVector())
5507 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5508 FPVT.getVectorElementCount());
5509 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5510 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5511 return SDValue();
5512 SDLoc DL(Fp);
5513 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5514 DAG.getValueType(NewVT.getScalarType()));
5515 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5516}
5517
5520 SelectionDAG &DAG) {
5521 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5522 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5523 // be truncated versions of the setcc (N0/N1).
5524 if ((N0 != N2 &&
5525 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5527 return SDValue();
5530 if (!N1C || !N3C)
5531 return SDValue();
5532 const APInt &C1 = N1C->getAPIntValue();
5533 const APInt &C3 = N3C->getAPIntValue();
5534 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5535 C1 != C3.zext(C1.getBitWidth()))
5536 return SDValue();
5537
5538 unsigned BW = (C1 + 1).exactLogBase2();
5539 EVT FPVT = N0.getOperand(0).getValueType();
5540 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5541 if (FPVT.isVector())
5542 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5543 FPVT.getVectorElementCount());
5545 FPVT, NewVT))
5546 return SDValue();
5547
5548 SDValue Sat =
5549 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5550 DAG.getValueType(NewVT.getScalarType()));
5551 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5552}
5553
5554SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5555 SDValue N0 = N->getOperand(0);
5556 SDValue N1 = N->getOperand(1);
5557 EVT VT = N0.getValueType();
5558 unsigned Opcode = N->getOpcode();
5559 SDLoc DL(N);
5560
5561 // fold operation with constant operands.
5562 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5563 return C;
5564
5565 // If the operands are the same, this is a no-op.
5566 if (N0 == N1)
5567 return N0;
5568
5569 // canonicalize constant to RHS
5572 return DAG.getNode(Opcode, DL, VT, N1, N0);
5573
5574 // fold vector ops
5575 if (VT.isVector())
5576 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5577 return FoldedVOp;
5578
5579 // reassociate minmax
5580 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5581 return RMINMAX;
5582
5583 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5584 // Only do this if:
5585 // 1. The current op isn't legal and the flipped is.
5586 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5587 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5588 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5589 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5590 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5591 unsigned AltOpcode;
5592 switch (Opcode) {
5593 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5594 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5595 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5596 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5597 default: llvm_unreachable("Unknown MINMAX opcode");
5598 }
5599 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5600 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5601 }
5602
5603 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5605 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5606 return S;
5607 if (Opcode == ISD::UMIN)
5608 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5609 return S;
5610
5611 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5612 auto ReductionOpcode = [](unsigned Opcode) {
5613 switch (Opcode) {
5614 case ISD::SMIN:
5615 return ISD::VECREDUCE_SMIN;
5616 case ISD::SMAX:
5617 return ISD::VECREDUCE_SMAX;
5618 case ISD::UMIN:
5619 return ISD::VECREDUCE_UMIN;
5620 case ISD::UMAX:
5621 return ISD::VECREDUCE_UMAX;
5622 default:
5623 llvm_unreachable("Unexpected opcode");
5624 }
5625 };
5626 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5627 SDLoc(N), VT, N0, N1))
5628 return SD;
5629
5630 // Simplify the operands using demanded-bits information.
5632 return SDValue(N, 0);
5633
5634 return SDValue();
5635}
5636
5637/// If this is a bitwise logic instruction and both operands have the same
5638/// opcode, try to sink the other opcode after the logic instruction.
5639SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5640 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5641 EVT VT = N0.getValueType();
5642 unsigned LogicOpcode = N->getOpcode();
5643 unsigned HandOpcode = N0.getOpcode();
5644 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5645 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5646
5647 // Bail early if none of these transforms apply.
5648 if (N0.getNumOperands() == 0)
5649 return SDValue();
5650
5651 // FIXME: We should check number of uses of the operands to not increase
5652 // the instruction count for all transforms.
5653
5654 // Handle size-changing casts (or sign_extend_inreg).
5655 SDValue X = N0.getOperand(0);
5656 SDValue Y = N1.getOperand(0);
5657 EVT XVT = X.getValueType();
5658 SDLoc DL(N);
5659 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5660 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5661 N0.getOperand(1) == N1.getOperand(1))) {
5662 // If both operands have other uses, this transform would create extra
5663 // instructions without eliminating anything.
5664 if (!N0.hasOneUse() && !N1.hasOneUse())
5665 return SDValue();
5666 // We need matching integer source types.
5667 if (XVT != Y.getValueType())
5668 return SDValue();
5669 // Don't create an illegal op during or after legalization. Don't ever
5670 // create an unsupported vector op.
5671 if ((VT.isVector() || LegalOperations) &&
5672 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5673 return SDValue();
5674 // Avoid infinite looping with PromoteIntBinOp.
5675 // TODO: Should we apply desirable/legal constraints to all opcodes?
5676 if ((HandOpcode == ISD::ANY_EXTEND ||
5677 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5678 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5679 return SDValue();
5680 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5681 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5682 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5683 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5684 return DAG.getNode(HandOpcode, DL, VT, Logic);
5685 }
5686
5687 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5688 if (HandOpcode == ISD::TRUNCATE) {
5689 // If both operands have other uses, this transform would create extra
5690 // instructions without eliminating anything.
5691 if (!N0.hasOneUse() && !N1.hasOneUse())
5692 return SDValue();
5693 // We need matching source types.
5694 if (XVT != Y.getValueType())
5695 return SDValue();
5696 // Don't create an illegal op during or after legalization.
5697 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5698 return SDValue();
5699 // Be extra careful sinking truncate. If it's free, there's no benefit in
5700 // widening a binop. Also, don't create a logic op on an illegal type.
5701 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5702 return SDValue();
5703 if (!TLI.isTypeLegal(XVT))
5704 return SDValue();
5705 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5706 return DAG.getNode(HandOpcode, DL, VT, Logic);
5707 }
5708
5709 // For binops SHL/SRL/SRA/AND:
5710 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5711 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5712 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5713 N0.getOperand(1) == N1.getOperand(1)) {
5714 // If either operand has other uses, this transform is not an improvement.
5715 if (!N0.hasOneUse() || !N1.hasOneUse())
5716 return SDValue();
5717 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5718 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5719 }
5720
5721 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5722 if (HandOpcode == ISD::BSWAP) {
5723 // If either operand has other uses, this transform is not an improvement.
5724 if (!N0.hasOneUse() || !N1.hasOneUse())
5725 return SDValue();
5726 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5727 return DAG.getNode(HandOpcode, DL, VT, Logic);
5728 }
5729
5730 // For funnel shifts FSHL/FSHR:
5731 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5732 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5733 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5734 N0.getOperand(2) == N1.getOperand(2)) {
5735 if (!N0.hasOneUse() || !N1.hasOneUse())
5736 return SDValue();
5737 SDValue X1 = N0.getOperand(1);
5738 SDValue Y1 = N1.getOperand(1);
5739 SDValue S = N0.getOperand(2);
5740 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5741 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5742 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5743 }
5744
5745 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5746 // Only perform this optimization up until type legalization, before
5747 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5748 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5749 // we don't want to undo this promotion.
5750 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5751 // on scalars.
5752 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5753 Level <= AfterLegalizeTypes) {
5754 // Input types must be integer and the same.
5755 if (XVT.isInteger() && XVT == Y.getValueType() &&
5756 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5757 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5758 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5759 return DAG.getNode(HandOpcode, DL, VT, Logic);
5760 }
5761 }
5762
5763 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5764 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5765 // If both shuffles use the same mask, and both shuffle within a single
5766 // vector, then it is worthwhile to move the swizzle after the operation.
5767 // The type-legalizer generates this pattern when loading illegal
5768 // vector types from memory. In many cases this allows additional shuffle
5769 // optimizations.
5770 // There are other cases where moving the shuffle after the xor/and/or
5771 // is profitable even if shuffles don't perform a swizzle.
5772 // If both shuffles use the same mask, and both shuffles have the same first
5773 // or second operand, then it might still be profitable to move the shuffle
5774 // after the xor/and/or operation.
5775 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5776 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5777 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5778 assert(X.getValueType() == Y.getValueType() &&
5779 "Inputs to shuffles are not the same type");
5780
5781 // Check that both shuffles use the same mask. The masks are known to be of
5782 // the same length because the result vector type is the same.
5783 // Check also that shuffles have only one use to avoid introducing extra
5784 // instructions.
5785 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5786 !SVN0->getMask().equals(SVN1->getMask()))
5787 return SDValue();
5788
5789 // Don't try to fold this node if it requires introducing a
5790 // build vector of all zeros that might be illegal at this stage.
5791 SDValue ShOp = N0.getOperand(1);
5792 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5793 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5794
5795 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5796 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5797 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5798 N0.getOperand(0), N1.getOperand(0));
5799 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5800 }
5801
5802 // Don't try to fold this node if it requires introducing a
5803 // build vector of all zeros that might be illegal at this stage.
5804 ShOp = N0.getOperand(0);
5805 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5806 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5807
5808 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5809 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5810 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5811 N1.getOperand(1));
5812 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5813 }
5814 }
5815
5816 return SDValue();
5817}
5818
5819/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5820SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5821 const SDLoc &DL) {
5822 SDValue LL, LR, RL, RR, N0CC, N1CC;
5823 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5824 !isSetCCEquivalent(N1, RL, RR, N1CC))
5825 return SDValue();
5826
5827 assert(N0.getValueType() == N1.getValueType() &&
5828 "Unexpected operand types for bitwise logic op");
5829 assert(LL.getValueType() == LR.getValueType() &&
5830 RL.getValueType() == RR.getValueType() &&
5831 "Unexpected operand types for setcc");
5832
5833 // If we're here post-legalization or the logic op type is not i1, the logic
5834 // op type must match a setcc result type. Also, all folds require new
5835 // operations on the left and right operands, so those types must match.
5836 EVT VT = N0.getValueType();
5837 EVT OpVT = LL.getValueType();
5838 if (LegalOperations || VT.getScalarType() != MVT::i1)
5839 if (VT != getSetCCResultType(OpVT))
5840 return SDValue();
5841 if (OpVT != RL.getValueType())
5842 return SDValue();
5843
5844 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5845 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5846 bool IsInteger = OpVT.isInteger();
5847 if (LR == RR && CC0 == CC1 && IsInteger) {
5848 bool IsZero = isNullOrNullSplat(LR);
5849 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5850
5851 // All bits clear?
5852 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5853 // All sign bits clear?
5854 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5855 // Any bits set?
5856 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5857 // Any sign bits set?
5858 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5859
5860 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5861 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5862 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5863 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5864 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5865 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5866 AddToWorklist(Or.getNode());
5867 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5868 }
5869
5870 // All bits set?
5871 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5872 // All sign bits set?
5873 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5874 // Any bits clear?
5875 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5876 // Any sign bits clear?
5877 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5878
5879 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5880 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5881 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5882 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5883 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5884 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5885 AddToWorklist(And.getNode());
5886 return DAG.getSetCC(DL, VT, And, LR, CC1);
5887 }
5888 }
5889
5890 // TODO: What is the 'or' equivalent of this fold?
5891 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5892 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5893 IsInteger && CC0 == ISD::SETNE &&
5894 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5895 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5896 SDValue One = DAG.getConstant(1, DL, OpVT);
5897 SDValue Two = DAG.getConstant(2, DL, OpVT);
5898 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5899 AddToWorklist(Add.getNode());
5900 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5901 }
5902
5903 // Try more general transforms if the predicates match and the only user of
5904 // the compares is the 'and' or 'or'.
5905 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5906 N0.hasOneUse() && N1.hasOneUse()) {
5907 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5908 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5909 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5910 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5911 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5912 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5913 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5914 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5915 }
5916
5917 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5918 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5919 // Match a shared variable operand and 2 non-opaque constant operands.
5920 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5921 // The difference of the constants must be a single bit.
5922 const APInt &CMax =
5923 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5924 const APInt &CMin =
5925 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5926 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5927 };
5928 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5929 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5930 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5931 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5932 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5933 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5934 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5935 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5936 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5937 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5938 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5939 }
5940 }
5941 }
5942
5943 // Canonicalize equivalent operands to LL == RL.
5944 if (LL == RR && LR == RL) {
5946 std::swap(RL, RR);
5947 }
5948
5949 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5950 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5951 if (LL == RL && LR == RR) {
5952 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5953 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5954 if (NewCC != ISD::SETCC_INVALID &&
5955 (!LegalOperations ||
5956 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5957 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5958 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5959 }
5960
5961 return SDValue();
5962}
5963
5964static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
5965 SelectionDAG &DAG) {
5966 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
5967}
5968
5969static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
5970 SelectionDAG &DAG) {
5971 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
5972}
5973
5974static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
5975 ISD::CondCode CC, unsigned OrAndOpcode,
5976 SelectionDAG &DAG,
5977 bool isFMAXNUMFMINNUM_IEEE,
5978 bool isFMAXNUMFMINNUM) {
5979 // The optimization cannot be applied for all the predicates because
5980 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
5981 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
5982 // applied at all if one of the operands is a signaling NaN.
5983
5984 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
5985 // are non NaN values.
5986 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
5987 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
5988 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5989 isFMAXNUMFMINNUM_IEEE
5992 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
5993 (OrAndOpcode == ISD::OR)) ||
5994 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
5995 (OrAndOpcode == ISD::AND)))
5996 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5997 isFMAXNUMFMINNUM_IEEE
6000 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6001 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6002 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6003 // that there are not any sNaNs, then the optimization is not valid
6004 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6005 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6006 // we can prove that we do not have any sNaNs, then we can do the
6007 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6008 // cases.
6009 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6010 (OrAndOpcode == ISD::OR)) ||
6011 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6012 (OrAndOpcode == ISD::AND)))
6013 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6014 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6015 isFMAXNUMFMINNUM_IEEE
6018 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6019 (OrAndOpcode == ISD::OR)) ||
6020 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6021 (OrAndOpcode == ISD::AND)))
6022 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6023 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6024 isFMAXNUMFMINNUM_IEEE
6027 return ISD::DELETED_NODE;
6028}
6029
6032 assert(
6033 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6034 "Invalid Op to combine SETCC with");
6035
6036 // TODO: Search past casts/truncates.
6037 SDValue LHS = LogicOp->getOperand(0);
6038 SDValue RHS = LogicOp->getOperand(1);
6039 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6040 !LHS->hasOneUse() || !RHS->hasOneUse())
6041 return SDValue();
6042
6043 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6045 LogicOp, LHS.getNode(), RHS.getNode());
6046
6047 SDValue LHS0 = LHS->getOperand(0);
6048 SDValue RHS0 = RHS->getOperand(0);
6049 SDValue LHS1 = LHS->getOperand(1);
6050 SDValue RHS1 = RHS->getOperand(1);
6051 // TODO: We don't actually need a splat here, for vectors we just need the
6052 // invariants to hold for each element.
6053 auto *LHS1C = isConstOrConstSplat(LHS1);
6054 auto *RHS1C = isConstOrConstSplat(RHS1);
6055 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6056 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6057 EVT VT = LogicOp->getValueType(0);
6058 EVT OpVT = LHS0.getValueType();
6059 SDLoc DL(LogicOp);
6060
6061 // Check if the operands of an and/or operation are comparisons and if they
6062 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6063 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6064 // sequence will be replaced with min-cmp sequence:
6065 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6066 // and and-cmp-cmp will be replaced with max-cmp sequence:
6067 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6068 // The optimization does not work for `==` or `!=` .
6069 // The two comparisons should have either the same predicate or the
6070 // predicate of one of the comparisons is the opposite of the other one.
6071 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6073 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6075 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6076 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6077 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6078 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6079 (OpVT.isFloatingPoint() &&
6080 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6082 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6083 CCL != ISD::SETTRUE &&
6084 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6085
6086 SDValue CommonValue, Operand1, Operand2;
6088 if (CCL == CCR) {
6089 if (LHS0 == RHS0) {
6090 CommonValue = LHS0;
6091 Operand1 = LHS1;
6092 Operand2 = RHS1;
6094 } else if (LHS1 == RHS1) {
6095 CommonValue = LHS1;
6096 Operand1 = LHS0;
6097 Operand2 = RHS0;
6098 CC = CCL;
6099 }
6100 } else {
6101 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6102 if (LHS0 == RHS1) {
6103 CommonValue = LHS0;
6104 Operand1 = LHS1;
6105 Operand2 = RHS0;
6106 CC = CCR;
6107 } else if (RHS0 == LHS1) {
6108 CommonValue = LHS1;
6109 Operand1 = LHS0;
6110 Operand2 = RHS1;
6111 CC = CCL;
6112 }
6113 }
6114
6115 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6116 // handle it using OR/AND.
6117 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6119 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6121
6122 if (CC != ISD::SETCC_INVALID) {
6123 unsigned NewOpcode = ISD::DELETED_NODE;
6124 bool IsSigned = isSignedIntSetCC(CC);
6125 if (OpVT.isInteger()) {
6126 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6127 CC == ISD::SETLT || CC == ISD::SETULT);
6128 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6129 if (IsLess == IsOr)
6130 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6131 else
6132 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6133 } else if (OpVT.isFloatingPoint())
6134 NewOpcode =
6135 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6136 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6137
6138 if (NewOpcode != ISD::DELETED_NODE) {
6139 SDValue MinMaxValue =
6140 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6141 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6142 }
6143 }
6144 }
6145
6146 if (TargetPreference == AndOrSETCCFoldKind::None)
6147 return SDValue();
6148
6149 if (CCL == CCR &&
6150 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6151 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6152 const APInt &APLhs = LHS1C->getAPIntValue();
6153 const APInt &APRhs = RHS1C->getAPIntValue();
6154
6155 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6156 // case this is just a compare).
6157 if (APLhs == (-APRhs) &&
6158 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6159 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6160 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6161 // (icmp eq A, C) | (icmp eq A, -C)
6162 // -> (icmp eq Abs(A), C)
6163 // (icmp ne A, C) & (icmp ne A, -C)
6164 // -> (icmp ne Abs(A), C)
6165 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6166 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6167 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6168 } else if (TargetPreference &
6170
6171 // AndOrSETCCFoldKind::AddAnd:
6172 // A == C0 | A == C1
6173 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6174 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6175 // A != C0 & A != C1
6176 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6177 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6178
6179 // AndOrSETCCFoldKind::NotAnd:
6180 // A == C0 | A == C1
6181 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6182 // -> ~A & smin(C0, C1) == 0
6183 // A != C0 & A != C1
6184 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6185 // -> ~A & smin(C0, C1) != 0
6186
6187 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6188 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6189 APInt Dif = MaxC - MinC;
6190 if (!Dif.isZero() && Dif.isPowerOf2()) {
6191 if (MaxC.isAllOnes() &&
6192 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6193 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6194 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6195 DAG.getConstant(MinC, DL, OpVT));
6196 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6197 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6198 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6199
6200 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6201 DAG.getConstant(-MinC, DL, OpVT));
6202 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6203 DAG.getConstant(~Dif, DL, OpVT));
6204 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6205 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6206 }
6207 }
6208 }
6209 }
6210
6211 return SDValue();
6212}
6213
6214// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6215// We canonicalize to the `select` form in the middle end, but the `and` form
6216// gets better codegen and all tested targets (arm, x86, riscv)
6218 const SDLoc &DL, SelectionDAG &DAG) {
6219 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6220 if (!isNullConstant(F))
6221 return SDValue();
6222
6223 EVT CondVT = Cond.getValueType();
6224 if (TLI.getBooleanContents(CondVT) !=
6226 return SDValue();
6227
6228 if (T.getOpcode() != ISD::AND)
6229 return SDValue();
6230
6231 if (!isOneConstant(T.getOperand(1)))
6232 return SDValue();
6233
6234 EVT OpVT = T.getValueType();
6235
6236 SDValue CondMask =
6237 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6238 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6239}
6240
6241/// This contains all DAGCombine rules which reduce two values combined by
6242/// an And operation to a single value. This makes them reusable in the context
6243/// of visitSELECT(). Rules involving constants are not included as
6244/// visitSELECT() already handles those cases.
6245SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6246 EVT VT = N1.getValueType();
6247 SDLoc DL(N);
6248
6249 // fold (and x, undef) -> 0
6250 if (N0.isUndef() || N1.isUndef())
6251 return DAG.getConstant(0, DL, VT);
6252
6253 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6254 return V;
6255
6256 // Canonicalize:
6257 // and(x, add) -> and(add, x)
6258 if (N1.getOpcode() == ISD::ADD)
6259 std::swap(N0, N1);
6260
6261 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6262 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6263 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6264 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6265 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6266 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6267 // immediate for an add, but it is legal if its top c2 bits are set,
6268 // transform the ADD so the immediate doesn't need to be materialized
6269 // in a register.
6270 APInt ADDC = ADDI->getAPIntValue();
6271 APInt SRLC = SRLI->getAPIntValue();
6272 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6273 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6275 SRLC.getZExtValue());
6276 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6277 ADDC |= Mask;
6278 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6279 SDLoc DL0(N0);
6280 SDValue NewAdd =
6281 DAG.getNode(ISD::ADD, DL0, VT,
6282 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6283 CombineTo(N0.getNode(), NewAdd);
6284 // Return N so it doesn't get rechecked!
6285 return SDValue(N, 0);
6286 }
6287 }
6288 }
6289 }
6290 }
6291 }
6292
6293 return SDValue();
6294}
6295
6296bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6297 EVT LoadResultTy, EVT &ExtVT) {
6298 if (!AndC->getAPIntValue().isMask())
6299 return false;
6300
6301 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6302
6303 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6304 EVT LoadedVT = LoadN->getMemoryVT();
6305
6306 if (ExtVT == LoadedVT &&
6307 (!LegalOperations ||
6308 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6309 // ZEXTLOAD will match without needing to change the size of the value being
6310 // loaded.
6311 return true;
6312 }
6313
6314 // Do not change the width of a volatile or atomic loads.
6315 if (!LoadN->isSimple())
6316 return false;
6317
6318 // Do not generate loads of non-round integer types since these can
6319 // be expensive (and would be wrong if the type is not byte sized).
6320 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6321 return false;
6322
6323 if (LegalOperations &&
6324 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6325 return false;
6326
6327 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6328 return false;
6329
6330 return true;
6331}
6332
6333bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6334 ISD::LoadExtType ExtType, EVT &MemVT,
6335 unsigned ShAmt) {
6336 if (!LDST)
6337 return false;
6338 // Only allow byte offsets.
6339 if (ShAmt % 8)
6340 return false;
6341
6342 // Do not generate loads of non-round integer types since these can
6343 // be expensive (and would be wrong if the type is not byte sized).
6344 if (!MemVT.isRound())
6345 return false;
6346
6347 // Don't change the width of a volatile or atomic loads.
6348 if (!LDST->isSimple())
6349 return false;
6350
6351 EVT LdStMemVT = LDST->getMemoryVT();
6352
6353 // Bail out when changing the scalable property, since we can't be sure that
6354 // we're actually narrowing here.
6355 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6356 return false;
6357
6358 // Verify that we are actually reducing a load width here.
6359 if (LdStMemVT.bitsLT(MemVT))
6360 return false;
6361
6362 // Ensure that this isn't going to produce an unsupported memory access.
6363 if (ShAmt) {
6364 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6365 const unsigned ByteShAmt = ShAmt / 8;
6366 const Align LDSTAlign = LDST->getAlign();
6367 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6368 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6369 LDST->getAddressSpace(), NarrowAlign,
6370 LDST->getMemOperand()->getFlags()))
6371 return false;
6372 }
6373
6374 // It's not possible to generate a constant of extended or untyped type.
6375 EVT PtrType = LDST->getBasePtr().getValueType();
6376 if (PtrType == MVT::Untyped || PtrType.isExtended())
6377 return false;
6378
6379 if (isa<LoadSDNode>(LDST)) {
6380 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6381 // Don't transform one with multiple uses, this would require adding a new
6382 // load.
6383 if (!SDValue(Load, 0).hasOneUse())
6384 return false;
6385
6386 if (LegalOperations &&
6387 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6388 return false;
6389
6390 // For the transform to be legal, the load must produce only two values
6391 // (the value loaded and the chain). Don't transform a pre-increment
6392 // load, for example, which produces an extra value. Otherwise the
6393 // transformation is not equivalent, and the downstream logic to replace
6394 // uses gets things wrong.
6395 if (Load->getNumValues() > 2)
6396 return false;
6397
6398 // If the load that we're shrinking is an extload and we're not just
6399 // discarding the extension we can't simply shrink the load. Bail.
6400 // TODO: It would be possible to merge the extensions in some cases.
6401 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6402 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6403 return false;
6404
6405 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6406 return false;
6407 } else {
6408 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6409 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6410 // Can't write outside the original store
6411 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6412 return false;
6413
6414 if (LegalOperations &&
6415 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6416 return false;
6417 }
6418 return true;
6419}
6420
6421bool DAGCombiner::SearchForAndLoads(SDNode *N,
6423 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6424 ConstantSDNode *Mask,
6425 SDNode *&NodeToMask) {
6426 // Recursively search for the operands, looking for loads which can be
6427 // narrowed.
6428 for (SDValue Op : N->op_values()) {
6429 if (Op.getValueType().isVector())
6430 return false;
6431
6432 // Some constants may need fixing up later if they are too large.
6433 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6434 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6435 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6436 NodesWithConsts.insert(N);
6437 continue;
6438 }
6439
6440 if (!Op.hasOneUse())
6441 return false;
6442
6443 switch(Op.getOpcode()) {
6444 case ISD::LOAD: {
6445 auto *Load = cast<LoadSDNode>(Op);
6446 EVT ExtVT;
6447 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6448 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6449
6450 // ZEXTLOAD is already small enough.
6451 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6452 ExtVT.bitsGE(Load->getMemoryVT()))
6453 continue;
6454
6455 // Use LE to convert equal sized loads to zext.
6456 if (ExtVT.bitsLE(Load->getMemoryVT()))
6457 Loads.push_back(Load);
6458
6459 continue;
6460 }
6461 return false;
6462 }
6463 case ISD::ZERO_EXTEND:
6464 case ISD::AssertZext: {
6465 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6466 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6467 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6468 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6469 Op.getOperand(0).getValueType();
6470
6471 // We can accept extending nodes if the mask is wider or an equal
6472 // width to the original type.
6473 if (ExtVT.bitsGE(VT))
6474 continue;
6475 break;
6476 }
6477 case ISD::OR:
6478 case ISD::XOR:
6479 case ISD::AND:
6480 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6481 NodeToMask))
6482 return false;
6483 continue;
6484 }
6485
6486 // Allow one node which will masked along with any loads found.
6487 if (NodeToMask)
6488 return false;
6489
6490 // Also ensure that the node to be masked only produces one data result.
6491 NodeToMask = Op.getNode();
6492 if (NodeToMask->getNumValues() > 1) {
6493 bool HasValue = false;
6494 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6495 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6496 if (VT != MVT::Glue && VT != MVT::Other) {
6497 if (HasValue) {
6498 NodeToMask = nullptr;
6499 return false;
6500 }
6501 HasValue = true;
6502 }
6503 }
6504 assert(HasValue && "Node to be masked has no data result?");
6505 }
6506 }
6507 return true;
6508}
6509
6510bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6511 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6512 if (!Mask)
6513 return false;
6514
6515 if (!Mask->getAPIntValue().isMask())
6516 return false;
6517
6518 // No need to do anything if the and directly uses a load.
6519 if (isa<LoadSDNode>(N->getOperand(0)))
6520 return false;
6521
6523 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6524 SDNode *FixupNode = nullptr;
6525 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6526 if (Loads.empty())
6527 return false;
6528
6529 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6530 SDValue MaskOp = N->getOperand(1);
6531
6532 // If it exists, fixup the single node we allow in the tree that needs
6533 // masking.
6534 if (FixupNode) {
6535 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6536 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6537 FixupNode->getValueType(0),
6538 SDValue(FixupNode, 0), MaskOp);
6539 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6540 if (And.getOpcode() == ISD ::AND)
6541 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6542 }
6543
6544 // Narrow any constants that need it.
6545 for (auto *LogicN : NodesWithConsts) {
6546 SDValue Op0 = LogicN->getOperand(0);
6547 SDValue Op1 = LogicN->getOperand(1);
6548
6549 if (isa<ConstantSDNode>(Op0))
6550 Op0 =
6551 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6552
6553 if (isa<ConstantSDNode>(Op1))
6554 Op1 =
6555 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6556
6557 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6558 std::swap(Op0, Op1);
6559
6560 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6561 }
6562
6563 // Create narrow loads.
6564 for (auto *Load : Loads) {
6565 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6566 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6567 SDValue(Load, 0), MaskOp);
6568 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6569 if (And.getOpcode() == ISD ::AND)
6570 And = SDValue(
6571 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6572 SDValue NewLoad = reduceLoadWidth(And.getNode());
6573 assert(NewLoad &&
6574 "Shouldn't be masking the load if it can't be narrowed");
6575 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6576 }
6577 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6578 return true;
6579 }
6580 return false;
6581}
6582
6583// Unfold
6584// x & (-1 'logical shift' y)
6585// To
6586// (x 'opposite logical shift' y) 'logical shift' y
6587// if it is better for performance.
6588SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6589 assert(N->getOpcode() == ISD::AND);
6590
6591 SDValue N0 = N->getOperand(0);
6592 SDValue N1 = N->getOperand(1);
6593
6594 // Do we actually prefer shifts over mask?
6596 return SDValue();
6597
6598 // Try to match (-1 '[outer] logical shift' y)
6599 unsigned OuterShift;
6600 unsigned InnerShift; // The opposite direction to the OuterShift.
6601 SDValue Y; // Shift amount.
6602 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6603 if (!M.hasOneUse())
6604 return false;
6605 OuterShift = M->getOpcode();
6606 if (OuterShift == ISD::SHL)
6607 InnerShift = ISD::SRL;
6608 else if (OuterShift == ISD::SRL)
6609 InnerShift = ISD::SHL;
6610 else
6611 return false;
6612 if (!isAllOnesConstant(M->getOperand(0)))
6613 return false;
6614 Y = M->getOperand(1);
6615 return true;
6616 };
6617
6618 SDValue X;
6619 if (matchMask(N1))
6620 X = N0;
6621 else if (matchMask(N0))
6622 X = N1;
6623 else
6624 return SDValue();
6625
6626 SDLoc DL(N);
6627 EVT VT = N->getValueType(0);
6628
6629 // tmp = x 'opposite logical shift' y
6630 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6631 // ret = tmp 'logical shift' y
6632 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6633
6634 return T1;
6635}
6636
6637/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6638/// For a target with a bit test, this is expected to become test + set and save
6639/// at least 1 instruction.
6641 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6642
6643 // Look through an optional extension.
6644 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6645 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6646 And0 = And0.getOperand(0);
6647 if (!isOneConstant(And1) || !And0.hasOneUse())
6648 return SDValue();
6649
6650 SDValue Src = And0;
6651
6652 // Attempt to find a 'not' op.
6653 // TODO: Should we favor test+set even without the 'not' op?
6654 bool FoundNot = false;
6655 if (isBitwiseNot(Src)) {
6656 FoundNot = true;
6657 Src = Src.getOperand(0);
6658
6659 // Look though an optional truncation. The source operand may not be the
6660 // same type as the original 'and', but that is ok because we are masking
6661 // off everything but the low bit.
6662 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6663 Src = Src.getOperand(0);
6664 }
6665
6666 // Match a shift-right by constant.
6667 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6668 return SDValue();
6669
6670 // This is probably not worthwhile without a supported type.
6671 EVT SrcVT = Src.getValueType();
6672 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6673 if (!TLI.isTypeLegal(SrcVT))
6674 return SDValue();
6675
6676 // We might have looked through casts that make this transform invalid.
6677 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6678 SDValue ShiftAmt = Src.getOperand(1);
6679 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6680 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6681 return SDValue();
6682
6683 // Set source to shift source.
6684 Src = Src.getOperand(0);
6685
6686 // Try again to find a 'not' op.
6687 // TODO: Should we favor test+set even with two 'not' ops?
6688 if (!FoundNot) {
6689 if (!isBitwiseNot(Src))
6690 return SDValue();
6691 Src = Src.getOperand(0);
6692 }
6693
6694 if (!TLI.hasBitTest(Src, ShiftAmt))
6695 return SDValue();
6696
6697 // Turn this into a bit-test pattern using mask op + setcc:
6698 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6699 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6700 SDLoc DL(And);
6701 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6702 EVT CCVT =
6703 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6704 SDValue Mask = DAG.getConstant(
6705 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6706 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6707 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6708 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6709 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6710}
6711
6712/// For targets that support usubsat, match a bit-hack form of that operation
6713/// that ends in 'and' and convert it.
6715 EVT VT = N->getValueType(0);
6716 unsigned BitWidth = VT.getScalarSizeInBits();
6717 APInt SignMask = APInt::getSignMask(BitWidth);
6718
6719 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6720 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6721 // xor/add with SMIN (signmask) are logically equivalent.
6722 SDValue X;
6723 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6725 m_SpecificInt(BitWidth - 1))))) &&
6728 m_SpecificInt(BitWidth - 1))))))
6729 return SDValue();
6730
6731 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6732 DAG.getConstant(SignMask, DL, VT));
6733}
6734
6735/// Given a bitwise logic operation N with a matching bitwise logic operand,
6736/// fold a pattern where 2 of the source operands are identically shifted
6737/// values. For example:
6738/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6740 SelectionDAG &DAG) {
6741 unsigned LogicOpcode = N->getOpcode();
6742 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6743 "Expected bitwise logic operation");
6744
6745 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6746 return SDValue();
6747
6748 // Match another bitwise logic op and a shift.
6749 unsigned ShiftOpcode = ShiftOp.getOpcode();
6750 if (LogicOp.getOpcode() != LogicOpcode ||
6751 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6752 ShiftOpcode == ISD::SRA))
6753 return SDValue();
6754
6755 // Match another shift op inside the first logic operand. Handle both commuted
6756 // possibilities.
6757 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6758 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6759 SDValue X1 = ShiftOp.getOperand(0);
6760 SDValue Y = ShiftOp.getOperand(1);
6761 SDValue X0, Z;
6762 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6763 LogicOp.getOperand(0).getOperand(1) == Y) {
6764 X0 = LogicOp.getOperand(0).getOperand(0);
6765 Z = LogicOp.getOperand(1);
6766 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6767 LogicOp.getOperand(1).getOperand(1) == Y) {
6768 X0 = LogicOp.getOperand(1).getOperand(0);
6769 Z = LogicOp.getOperand(0);
6770 } else {
6771 return SDValue();
6772 }
6773
6774 EVT VT = N->getValueType(0);
6775 SDLoc DL(N);
6776 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6777 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6778 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6779}
6780
6781/// Given a tree of logic operations with shape like
6782/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6783/// try to match and fold shift operations with the same shift amount.
6784/// For example:
6785/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6786/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6788 SDValue RightHand, SelectionDAG &DAG) {
6789 unsigned LogicOpcode = N->getOpcode();
6790 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6791 "Expected bitwise logic operation");
6792 if (LeftHand.getOpcode() != LogicOpcode ||
6793 RightHand.getOpcode() != LogicOpcode)
6794 return SDValue();
6795 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6796 return SDValue();
6797
6798 // Try to match one of following patterns:
6799 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6800 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6801 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6802 // itself.
6803 SDValue CombinedShifts, W;
6804 SDValue R0 = RightHand.getOperand(0);
6805 SDValue R1 = RightHand.getOperand(1);
6806 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6807 W = R1;
6808 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6809 W = R0;
6810 else
6811 return SDValue();
6812
6813 EVT VT = N->getValueType(0);
6814 SDLoc DL(N);
6815 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6816}
6817
6818SDValue DAGCombiner::visitAND(SDNode *N) {
6819 SDValue N0 = N->getOperand(0);
6820 SDValue N1 = N->getOperand(1);
6821 EVT VT = N1.getValueType();
6822 SDLoc DL(N);
6823
6824 // x & x --> x
6825 if (N0 == N1)
6826 return N0;
6827
6828 // fold (and c1, c2) -> c1&c2
6829 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6830 return C;
6831
6832 // canonicalize constant to RHS
6835 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6836
6837 if (areBitwiseNotOfEachother(N0, N1))
6838 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6839
6840 // fold vector ops
6841 if (VT.isVector()) {
6842 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6843 return FoldedVOp;
6844
6845 // fold (and x, 0) -> 0, vector edition
6847 // do not return N1, because undef node may exist in N1
6849 N1.getValueType());
6850
6851 // fold (and x, -1) -> x, vector edition
6853 return N0;
6854
6855 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6856 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6857 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6858 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6859 N1.hasOneUse()) {
6860 EVT LoadVT = MLoad->getMemoryVT();
6861 EVT ExtVT = VT;
6862 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6863 // For this AND to be a zero extension of the masked load the elements
6864 // of the BuildVec must mask the bottom bits of the extended element
6865 // type
6866 uint64_t ElementSize =
6868 if (Splat->getAPIntValue().isMask(ElementSize)) {
6869 SDValue NewLoad = DAG.getMaskedLoad(
6870 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6871 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6872 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6873 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6874 bool LoadHasOtherUsers = !N0.hasOneUse();
6875 CombineTo(N, NewLoad);
6876 if (LoadHasOtherUsers)
6877 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6878 return SDValue(N, 0);
6879 }
6880 }
6881 }
6882 }
6883
6884 // fold (and x, -1) -> x
6885 if (isAllOnesConstant(N1))
6886 return N0;
6887
6888 // if (and x, c) is known to be zero, return 0
6889 unsigned BitWidth = VT.getScalarSizeInBits();
6892 return DAG.getConstant(0, DL, VT);
6893
6894 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6895 return R;
6896
6897 if (SDValue NewSel = foldBinOpIntoSelect(N))
6898 return NewSel;
6899
6900 // reassociate and
6901 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6902 return RAND;
6903
6904 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
6905 if (SDValue SD =
6906 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
6907 return SD;
6908
6909 // fold (and (or x, C), D) -> D if (C & D) == D
6910 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6911 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6912 };
6913 if (N0.getOpcode() == ISD::OR &&
6914 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6915 return N1;
6916
6917 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6918 SDValue N0Op0 = N0.getOperand(0);
6919 EVT SrcVT = N0Op0.getValueType();
6920 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
6921 APInt Mask = ~N1C->getAPIntValue();
6922 Mask = Mask.trunc(SrcBitWidth);
6923
6924 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6925 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6926 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
6927
6928 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
6929 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
6930 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
6931 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
6932 TLI.isNarrowingProfitable(VT, SrcVT))
6933 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
6934 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
6935 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
6936 }
6937
6938 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6939 if (ISD::isExtOpcode(N0.getOpcode())) {
6940 unsigned ExtOpc = N0.getOpcode();
6941 SDValue N0Op0 = N0.getOperand(0);
6942 if (N0Op0.getOpcode() == ISD::AND &&
6943 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6946 N0->hasOneUse() && N0Op0->hasOneUse()) {
6947 SDValue NewMask =
6948 DAG.getNode(ISD::AND, DL, VT, N1,
6949 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6950 return DAG.getNode(ISD::AND, DL, VT,
6951 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6952 NewMask);
6953 }
6954 }
6955
6956 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6957 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6958 // already be zero by virtue of the width of the base type of the load.
6959 //
6960 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6961 // more cases.
6962 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6964 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6965 N0.getOperand(0).getResNo() == 0) ||
6966 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6967 auto *Load =
6968 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
6969
6970 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6971 // This can be a pure constant or a vector splat, in which case we treat the
6972 // vector as a scalar and use the splat value.
6975 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6976 Constant = C->getAPIntValue();
6977 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6978 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6979 APInt SplatValue, SplatUndef;
6980 unsigned SplatBitSize;
6981 bool HasAnyUndefs;
6982 // Endianness should not matter here. Code below makes sure that we only
6983 // use the result if the SplatBitSize is a multiple of the vector element
6984 // size. And after that we AND all element sized parts of the splat
6985 // together. So the end result should be the same regardless of in which
6986 // order we do those operations.
6987 const bool IsBigEndian = false;
6988 bool IsSplat =
6989 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6990 HasAnyUndefs, EltBitWidth, IsBigEndian);
6991
6992 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6993 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6994 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
6995 // Undef bits can contribute to a possible optimisation if set, so
6996 // set them.
6997 SplatValue |= SplatUndef;
6998
6999 // The splat value may be something like "0x00FFFFFF", which means 0 for
7000 // the first vector value and FF for the rest, repeating. We need a mask
7001 // that will apply equally to all members of the vector, so AND all the
7002 // lanes of the constant together.
7003 Constant = APInt::getAllOnes(EltBitWidth);
7004 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7005 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7006 }
7007 }
7008
7009 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7010 // actually legal and isn't going to get expanded, else this is a false
7011 // optimisation.
7012 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7013 Load->getValueType(0),
7014 Load->getMemoryVT());
7015
7016 // Resize the constant to the same size as the original memory access before
7017 // extension. If it is still the AllOnesValue then this AND is completely
7018 // unneeded.
7019 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7020
7021 bool B;
7022 switch (Load->getExtensionType()) {
7023 default: B = false; break;
7024 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7025 case ISD::ZEXTLOAD:
7026 case ISD::NON_EXTLOAD: B = true; break;
7027 }
7028
7029 if (B && Constant.isAllOnes()) {
7030 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7031 // preserve semantics once we get rid of the AND.
7032 SDValue NewLoad(Load, 0);
7033
7034 // Fold the AND away. NewLoad may get replaced immediately.
7035 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7036
7037 if (Load->getExtensionType() == ISD::EXTLOAD) {
7038 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7039 Load->getValueType(0), SDLoc(Load),
7040 Load->getChain(), Load->getBasePtr(),
7041 Load->getOffset(), Load->getMemoryVT(),
7042 Load->getMemOperand());
7043 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7044 if (Load->getNumValues() == 3) {
7045 // PRE/POST_INC loads have 3 values.
7046 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7047 NewLoad.getValue(2) };
7048 CombineTo(Load, To, 3, true);
7049 } else {
7050 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7051 }
7052 }
7053
7054 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7055 }
7056 }
7057
7058 // Try to convert a constant mask AND into a shuffle clear mask.
7059 if (VT.isVector())
7060 if (SDValue Shuffle = XformToShuffleWithZero(N))
7061 return Shuffle;
7062
7063 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7064 return Combined;
7065
7066 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7068 SDValue Ext = N0.getOperand(0);
7069 EVT ExtVT = Ext->getValueType(0);
7070 SDValue Extendee = Ext->getOperand(0);
7071
7072 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7073 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7074 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7075 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7076 // => (extract_subvector (iN_zeroext v))
7077 SDValue ZeroExtExtendee =
7078 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7079
7080 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7081 N0.getOperand(1));
7082 }
7083 }
7084
7085 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7086 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7087 EVT MemVT = GN0->getMemoryVT();
7088 EVT ScalarVT = MemVT.getScalarType();
7089
7090 if (SDValue(GN0, 0).hasOneUse() &&
7091 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7093 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7094 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7095
7096 SDValue ZExtLoad = DAG.getMaskedGather(
7097 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7098 GN0->getIndexType(), ISD::ZEXTLOAD);
7099
7100 CombineTo(N, ZExtLoad);
7101 AddToWorklist(ZExtLoad.getNode());
7102 // Avoid recheck of N.
7103 return SDValue(N, 0);
7104 }
7105 }
7106
7107 // fold (and (load x), 255) -> (zextload x, i8)
7108 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7109 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7110 if (SDValue Res = reduceLoadWidth(N))
7111 return Res;
7112
7113 if (LegalTypes) {
7114 // Attempt to propagate the AND back up to the leaves which, if they're
7115 // loads, can be combined to narrow loads and the AND node can be removed.
7116 // Perform after legalization so that extend nodes will already be
7117 // combined into the loads.
7118 if (BackwardsPropagateMask(N))
7119 return SDValue(N, 0);
7120 }
7121
7122 if (SDValue Combined = visitANDLike(N0, N1, N))
7123 return Combined;
7124
7125 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7126 if (N0.getOpcode() == N1.getOpcode())
7127 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7128 return V;
7129
7130 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7131 return R;
7132 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7133 return R;
7134
7135 // Masking the negated extension of a boolean is just the zero-extended
7136 // boolean:
7137 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7138 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7139 //
7140 // Note: the SimplifyDemandedBits fold below can make an information-losing
7141 // transform, and then we have no way to find this better fold.
7142 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7143 if (isNullOrNullSplat(N0.getOperand(0))) {
7144 SDValue SubRHS = N0.getOperand(1);
7145 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7146 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7147 return SubRHS;
7148 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7149 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7150 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7151 }
7152 }
7153
7154 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7155 // fold (and (sra)) -> (and (srl)) when possible.
7157 return SDValue(N, 0);
7158
7159 // fold (zext_inreg (extload x)) -> (zextload x)
7160 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7161 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7162 (ISD::isEXTLoad(N0.getNode()) ||
7163 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7164 auto *LN0 = cast<LoadSDNode>(N0);
7165 EVT MemVT = LN0->getMemoryVT();
7166 // If we zero all the possible extended bits, then we can turn this into
7167 // a zextload if we are running before legalize or the operation is legal.
7168 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7169 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7170 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7171 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7172 ((!LegalOperations && LN0->isSimple()) ||
7173 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7174 SDValue ExtLoad =
7175 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7176 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7177 AddToWorklist(N);
7178 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7179 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7180 }
7181 }
7182
7183 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7184 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7185 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7186 N0.getOperand(1), false))
7187 return BSwap;
7188 }
7189
7190 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7191 return Shifts;
7192
7193 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7194 return V;
7195
7196 // Recognize the following pattern:
7197 //
7198 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7199 //
7200 // where bitmask is a mask that clears the upper bits of AndVT. The
7201 // number of bits in bitmask must be a power of two.
7202 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7203 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7204 return false;
7205
7206 auto *C = dyn_cast<ConstantSDNode>(RHS);
7207 if (!C)
7208 return false;
7209
7210 if (!C->getAPIntValue().isMask(
7211 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7212 return false;
7213
7214 return true;
7215 };
7216
7217 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7218 if (IsAndZeroExtMask(N0, N1))
7219 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7220
7221 if (hasOperation(ISD::USUBSAT, VT))
7222 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7223 return V;
7224
7225 // Postpone until legalization completed to avoid interference with bswap
7226 // folding
7227 if (LegalOperations || VT.isVector())
7228 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7229 return R;
7230
7231 return SDValue();
7232}
7233
7234/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7235SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7236 bool DemandHighBits) {
7237 if (!LegalOperations)
7238 return SDValue();
7239
7240 EVT VT = N->getValueType(0);
7241 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7242 return SDValue();
7244 return SDValue();
7245
7246 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7247 bool LookPassAnd0 = false;
7248 bool LookPassAnd1 = false;
7249 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7250 std::swap(N0, N1);
7251 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7252 std::swap(N0, N1);
7253 if (N0.getOpcode() == ISD::AND) {
7254 if (!N0->hasOneUse())
7255 return SDValue();
7256 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7257 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7258 // This is needed for X86.
7259 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7260 N01C->getZExtValue() != 0xFFFF))
7261 return SDValue();
7262 N0 = N0.getOperand(0);
7263 LookPassAnd0 = true;
7264 }
7265
7266 if (N1.getOpcode() == ISD::AND) {
7267 if (!N1->hasOneUse())
7268 return SDValue();
7269 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7270 if (!N11C || N11C->getZExtValue() != 0xFF)
7271 return SDValue();
7272 N1 = N1.getOperand(0);
7273 LookPassAnd1 = true;
7274 }
7275
7276 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7277 std::swap(N0, N1);
7278 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7279 return SDValue();
7280 if (!N0->hasOneUse() || !N1->hasOneUse())
7281 return SDValue();
7282
7283 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7284 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7285 if (!N01C || !N11C)
7286 return SDValue();
7287 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7288 return SDValue();
7289
7290 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7291 SDValue N00 = N0->getOperand(0);
7292 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7293 if (!N00->hasOneUse())
7294 return SDValue();
7295 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7296 if (!N001C || N001C->getZExtValue() != 0xFF)
7297 return SDValue();
7298 N00 = N00.getOperand(0);
7299 LookPassAnd0 = true;
7300 }
7301
7302 SDValue N10 = N1->getOperand(0);
7303 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7304 if (!N10->hasOneUse())
7305 return SDValue();
7306 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7307 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7308 // for X86.
7309 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7310 N101C->getZExtValue() != 0xFFFF))
7311 return SDValue();
7312 N10 = N10.getOperand(0);
7313 LookPassAnd1 = true;
7314 }
7315
7316 if (N00 != N10)
7317 return SDValue();
7318
7319 // Make sure everything beyond the low halfword gets set to zero since the SRL
7320 // 16 will clear the top bits.
7321 unsigned OpSizeInBits = VT.getSizeInBits();
7322 if (OpSizeInBits > 16) {
7323 // If the left-shift isn't masked out then the only way this is a bswap is
7324 // if all bits beyond the low 8 are 0. In that case the entire pattern
7325 // reduces to a left shift anyway: leave it for other parts of the combiner.
7326 if (DemandHighBits && !LookPassAnd0)
7327 return SDValue();
7328
7329 // However, if the right shift isn't masked out then it might be because
7330 // it's not needed. See if we can spot that too. If the high bits aren't
7331 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7332 // upper bits to be zero.
7333 if (!LookPassAnd1) {
7334 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7335 if (!DAG.MaskedValueIsZero(N10,
7336 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7337 return SDValue();
7338 }
7339 }
7340
7341 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7342 if (OpSizeInBits > 16) {
7343 SDLoc DL(N);
7344 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7345 DAG.getConstant(OpSizeInBits - 16, DL,
7346 getShiftAmountTy(VT)));
7347 }
7348 return Res;
7349}
7350
7351/// Return true if the specified node is an element that makes up a 32-bit
7352/// packed halfword byteswap.
7353/// ((x & 0x000000ff) << 8) |
7354/// ((x & 0x0000ff00) >> 8) |
7355/// ((x & 0x00ff0000) << 8) |
7356/// ((x & 0xff000000) >> 8)
7358 if (!N->hasOneUse())
7359 return false;
7360
7361 unsigned Opc = N.getOpcode();
7362 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7363 return false;
7364
7365 SDValue N0 = N.getOperand(0);
7366 unsigned Opc0 = N0.getOpcode();
7367 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7368 return false;
7369
7370 ConstantSDNode *N1C = nullptr;
7371 // SHL or SRL: look upstream for AND mask operand
7372 if (Opc == ISD::AND)
7373 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7374 else if (Opc0 == ISD::AND)
7375 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7376 if (!N1C)
7377 return false;
7378
7379 unsigned MaskByteOffset;
7380 switch (N1C->getZExtValue()) {
7381 default:
7382 return false;
7383 case 0xFF: MaskByteOffset = 0; break;
7384 case 0xFF00: MaskByteOffset = 1; break;
7385 case 0xFFFF:
7386 // In case demanded bits didn't clear the bits that will be shifted out.
7387 // This is needed for X86.
7388 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7389 MaskByteOffset = 1;
7390 break;
7391 }
7392 return false;
7393 case 0xFF0000: MaskByteOffset = 2; break;
7394 case 0xFF000000: MaskByteOffset = 3; break;
7395 }
7396
7397 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7398 if (Opc == ISD::AND) {
7399 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7400 // (x >> 8) & 0xff
7401 // (x >> 8) & 0xff0000
7402 if (Opc0 != ISD::SRL)
7403 return false;
7404 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7405 if (!C || C->getZExtValue() != 8)
7406 return false;
7407 } else {
7408 // (x << 8) & 0xff00
7409 // (x << 8) & 0xff000000
7410 if (Opc0 != ISD::SHL)
7411 return false;
7412 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7413 if (!C || C->getZExtValue() != 8)
7414 return false;
7415 }
7416 } else if (Opc == ISD::SHL) {
7417 // (x & 0xff) << 8
7418 // (x & 0xff0000) << 8
7419 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7420 return false;
7421 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7422 if (!C || C->getZExtValue() != 8)
7423 return false;
7424 } else { // Opc == ISD::SRL
7425 // (x & 0xff00) >> 8
7426 // (x & 0xff000000) >> 8
7427 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7428 return false;
7429 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7430 if (!C || C->getZExtValue() != 8)
7431 return false;
7432 }
7433
7434 if (Parts[MaskByteOffset])
7435 return false;
7436
7437 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7438 return true;
7439}
7440
7441// Match 2 elements of a packed halfword bswap.
7443 if (N.getOpcode() == ISD::OR)
7444 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7445 isBSwapHWordElement(N.getOperand(1), Parts);
7446
7447 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7448 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7449 if (!C || C->getAPIntValue() != 16)
7450 return false;
7451 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7452 return true;
7453 }
7454
7455 return false;
7456}
7457
7458// Match this pattern:
7459// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7460// And rewrite this to:
7461// (rotr (bswap A), 16)
7463 SelectionDAG &DAG, SDNode *N, SDValue N0,
7464 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7465 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7466 "MatchBSwapHWordOrAndAnd: expecting i32");
7467 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7468 return SDValue();
7469 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7470 return SDValue();
7471 // TODO: this is too restrictive; lifting this restriction requires more tests
7472 if (!N0->hasOneUse() || !N1->hasOneUse())
7473 return SDValue();
7476 if (!Mask0 || !Mask1)
7477 return SDValue();
7478 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7479 Mask1->getAPIntValue() != 0x00ff00ff)
7480 return SDValue();
7481 SDValue Shift0 = N0.getOperand(0);
7482 SDValue Shift1 = N1.getOperand(0);
7483 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7484 return SDValue();
7485 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7486 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7487 if (!ShiftAmt0 || !ShiftAmt1)
7488 return SDValue();
7489 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7490 return SDValue();
7491 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7492 return SDValue();
7493
7494 SDLoc DL(N);
7495 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7496 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7497 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7498}
7499
7500/// Match a 32-bit packed halfword bswap. That is
7501/// ((x & 0x000000ff) << 8) |
7502/// ((x & 0x0000ff00) >> 8) |
7503/// ((x & 0x00ff0000) << 8) |
7504/// ((x & 0xff000000) >> 8)
7505/// => (rotl (bswap x), 16)
7506SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7507 if (!LegalOperations)
7508 return SDValue();
7509
7510 EVT VT = N->getValueType(0);
7511 if (VT != MVT::i32)
7512 return SDValue();
7514 return SDValue();
7515
7516 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7517 getShiftAmountTy(VT)))
7518 return BSwap;
7519
7520 // Try again with commuted operands.
7521 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7522 getShiftAmountTy(VT)))
7523 return BSwap;
7524
7525
7526 // Look for either
7527 // (or (bswaphpair), (bswaphpair))
7528 // (or (or (bswaphpair), (and)), (and))
7529 // (or (or (and), (bswaphpair)), (and))
7530 SDNode *Parts[4] = {};
7531
7532 if (isBSwapHWordPair(N0, Parts)) {
7533 // (or (or (and), (and)), (or (and), (and)))
7534 if (!isBSwapHWordPair(N1, Parts))
7535 return SDValue();
7536 } else if (N0.getOpcode() == ISD::OR) {
7537 // (or (or (or (and), (and)), (and)), (and))
7538 if (!isBSwapHWordElement(N1, Parts))
7539 return SDValue();
7540 SDValue N00 = N0.getOperand(0);
7541 SDValue N01 = N0.getOperand(1);
7542 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7543 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7544 return SDValue();
7545 } else {
7546 return SDValue();
7547 }
7548
7549 // Make sure the parts are all coming from the same node.
7550 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7551 return SDValue();
7552
7553 SDLoc DL(N);
7554 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7555 SDValue(Parts[0], 0));
7556
7557 // Result of the bswap should be rotated by 16. If it's not legal, then
7558 // do (x << 16) | (x >> 16).
7559 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7561 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7563 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7564 return DAG.getNode(ISD::OR, DL, VT,
7565 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7566 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7567}
7568
7569/// This contains all DAGCombine rules which reduce two values combined by
7570/// an Or operation to a single value \see visitANDLike().
7571SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7572 EVT VT = N1.getValueType();
7573
7574 // fold (or x, undef) -> -1
7575 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7576 return DAG.getAllOnesConstant(DL, VT);
7577
7578 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7579 return V;
7580
7581 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7582 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7583 // Don't increase # computations.
7584 (N0->hasOneUse() || N1->hasOneUse())) {
7585 // We can only do this xform if we know that bits from X that are set in C2
7586 // but not in C1 are already zero. Likewise for Y.
7587 if (const ConstantSDNode *N0O1C =
7589 if (const ConstantSDNode *N1O1C =
7591 // We can only do this xform if we know that bits from X that are set in
7592 // C2 but not in C1 are already zero. Likewise for Y.
7593 const APInt &LHSMask = N0O1C->getAPIntValue();
7594 const APInt &RHSMask = N1O1C->getAPIntValue();
7595
7596 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7597 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7598 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7599 N0.getOperand(0), N1.getOperand(0));
7600 return DAG.getNode(ISD::AND, DL, VT, X,
7601 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7602 }
7603 }
7604 }
7605 }
7606
7607 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7608 if (N0.getOpcode() == ISD::AND &&
7609 N1.getOpcode() == ISD::AND &&
7610 N0.getOperand(0) == N1.getOperand(0) &&
7611 // Don't increase # computations.
7612 (N0->hasOneUse() || N1->hasOneUse())) {
7613 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7614 N0.getOperand(1), N1.getOperand(1));
7615 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7616 }
7617
7618 return SDValue();
7619}
7620
7621/// OR combines for which the commuted variant will be tried as well.
7623 SDNode *N) {
7624 EVT VT = N0.getValueType();
7625 unsigned BW = VT.getScalarSizeInBits();
7626 SDLoc DL(N);
7627
7628 auto peekThroughResize = [](SDValue V) {
7629 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7630 return V->getOperand(0);
7631 return V;
7632 };
7633
7634 SDValue N0Resized = peekThroughResize(N0);
7635 if (N0Resized.getOpcode() == ISD::AND) {
7636 SDValue N1Resized = peekThroughResize(N1);
7637 SDValue N00 = N0Resized.getOperand(0);
7638 SDValue N01 = N0Resized.getOperand(1);
7639
7640 // fold or (and x, y), x --> x
7641 if (N00 == N1Resized || N01 == N1Resized)
7642 return N1;
7643
7644 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7645 // TODO: Set AllowUndefs = true.
7646 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7647 /* AllowUndefs */ false)) {
7648 if (peekThroughResize(NotOperand) == N1Resized)
7649 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7650 N1);
7651 }
7652
7653 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7654 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7655 /* AllowUndefs */ false)) {
7656 if (peekThroughResize(NotOperand) == N1Resized)
7657 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7658 N1);
7659 }
7660 }
7661
7662 SDValue X, Y;
7663
7664 // fold or (xor X, N1), N1 --> or X, N1
7665 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7666 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7667
7668 // fold or (xor x, y), (x and/or y) --> or x, y
7669 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7670 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7672 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7673
7674 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7675 return R;
7676
7677 auto peekThroughZext = [](SDValue V) {
7678 if (V->getOpcode() == ISD::ZERO_EXTEND)
7679 return V->getOperand(0);
7680 return V;
7681 };
7682
7683 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7684 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7685 N0.getOperand(0) == N1.getOperand(0) &&
7686 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7687 return N0;
7688
7689 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7690 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7691 N0.getOperand(1) == N1.getOperand(0) &&
7692 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7693 return N0;
7694
7695 // Attempt to match a legalized build_pair-esque pattern:
7696 // or(shl(aext(Hi),BW/2),zext(Lo))
7697 SDValue Lo, Hi;
7698 if (sd_match(N0,
7700 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7701 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7702 Lo.getValueType() == Hi.getValueType()) {
7703 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7704 SDValue NotLo, NotHi;
7705 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7706 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7707 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7708 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7709 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7710 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7711 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7712 }
7713 }
7714
7715 return SDValue();
7716}
7717
7718SDValue DAGCombiner::visitOR(SDNode *N) {
7719 SDValue N0 = N->getOperand(0);
7720 SDValue N1 = N->getOperand(1);
7721 EVT VT = N1.getValueType();
7722 SDLoc DL(N);
7723
7724 // x | x --> x
7725 if (N0 == N1)
7726 return N0;
7727
7728 // fold (or c1, c2) -> c1|c2
7729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7730 return C;
7731
7732 // canonicalize constant to RHS
7735 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7736
7737 // fold vector ops
7738 if (VT.isVector()) {
7739 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7740 return FoldedVOp;
7741
7742 // fold (or x, 0) -> x, vector edition
7744 return N0;
7745
7746 // fold (or x, -1) -> -1, vector edition
7748 // do not return N1, because undef node may exist in N1
7749 return DAG.getAllOnesConstant(DL, N1.getValueType());
7750
7751 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7752 // Do this only if the resulting type / shuffle is legal.
7753 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7754 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7755 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7756 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7757 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7758 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7759 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7760 // Ensure both shuffles have a zero input.
7761 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7762 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7763 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7764 bool CanFold = true;
7765 int NumElts = VT.getVectorNumElements();
7766 SmallVector<int, 4> Mask(NumElts, -1);
7767
7768 for (int i = 0; i != NumElts; ++i) {
7769 int M0 = SV0->getMaskElt(i);
7770 int M1 = SV1->getMaskElt(i);
7771
7772 // Determine if either index is pointing to a zero vector.
7773 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7774 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7775
7776 // If one element is zero and the otherside is undef, keep undef.
7777 // This also handles the case that both are undef.
7778 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7779 continue;
7780
7781 // Make sure only one of the elements is zero.
7782 if (M0Zero == M1Zero) {
7783 CanFold = false;
7784 break;
7785 }
7786
7787 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7788
7789 // We have a zero and non-zero element. If the non-zero came from
7790 // SV0 make the index a LHS index. If it came from SV1, make it
7791 // a RHS index. We need to mod by NumElts because we don't care
7792 // which operand it came from in the original shuffles.
7793 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7794 }
7795
7796 if (CanFold) {
7797 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7798 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7799 SDValue LegalShuffle =
7800 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7801 if (LegalShuffle)
7802 return LegalShuffle;
7803 }
7804 }
7805 }
7806 }
7807
7808 // fold (or x, 0) -> x
7809 if (isNullConstant(N1))
7810 return N0;
7811
7812 // fold (or x, -1) -> -1
7813 if (isAllOnesConstant(N1))
7814 return N1;
7815
7816 if (SDValue NewSel = foldBinOpIntoSelect(N))
7817 return NewSel;
7818
7819 // fold (or x, c) -> c iff (x & ~c) == 0
7820 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7821 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7822 return N1;
7823
7824 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7825 return R;
7826
7827 if (SDValue Combined = visitORLike(N0, N1, DL))
7828 return Combined;
7829
7830 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7831 return Combined;
7832
7833 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7834 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7835 return BSwap;
7836 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7837 return BSwap;
7838
7839 // reassociate or
7840 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7841 return ROR;
7842
7843 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7844 if (SDValue SD =
7845 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7846 return SD;
7847
7848 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7849 // iff (c1 & c2) != 0 or c1/c2 are undef.
7850 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7851 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7852 };
7853 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7854 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7855 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7856 {N1, N0.getOperand(1)})) {
7857 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7858 AddToWorklist(IOR.getNode());
7859 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7860 }
7861 }
7862
7863 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7864 return Combined;
7865 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7866 return Combined;
7867
7868 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7869 if (N0.getOpcode() == N1.getOpcode())
7870 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7871 return V;
7872
7873 // See if this is some rotate idiom.
7874 if (SDValue Rot = MatchRotate(N0, N1, DL))
7875 return Rot;
7876
7877 if (SDValue Load = MatchLoadCombine(N))
7878 return Load;
7879
7880 // Simplify the operands using demanded-bits information.
7882 return SDValue(N, 0);
7883
7884 // If OR can be rewritten into ADD, try combines based on ADD.
7885 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7886 DAG.isADDLike(SDValue(N, 0)))
7887 if (SDValue Combined = visitADDLike(N))
7888 return Combined;
7889
7890 // Postpone until legalization completed to avoid interference with bswap
7891 // folding
7892 if (LegalOperations || VT.isVector())
7893 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7894 return R;
7895
7896 return SDValue();
7897}
7898
7900 SDValue &Mask) {
7901 if (Op.getOpcode() == ISD::AND &&
7902 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7903 Mask = Op.getOperand(1);
7904 return Op.getOperand(0);
7905 }
7906 return Op;
7907}
7908
7909/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7910static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7911 SDValue &Mask) {
7912 Op = stripConstantMask(DAG, Op, Mask);
7913 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7914 Shift = Op;
7915 return true;
7916 }
7917 return false;
7918}
7919
7920/// Helper function for visitOR to extract the needed side of a rotate idiom
7921/// from a shl/srl/mul/udiv. This is meant to handle cases where
7922/// InstCombine merged some outside op with one of the shifts from
7923/// the rotate pattern.
7924/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7925/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7926/// patterns:
7927///
7928/// (or (add v v) (shrl v bitwidth-1)):
7929/// expands (add v v) -> (shl v 1)
7930///
7931/// (or (mul v c0) (shrl (mul v c1) c2)):
7932/// expands (mul v c0) -> (shl (mul v c1) c3)
7933///
7934/// (or (udiv v c0) (shl (udiv v c1) c2)):
7935/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7936///
7937/// (or (shl v c0) (shrl (shl v c1) c2)):
7938/// expands (shl v c0) -> (shl (shl v c1) c3)
7939///
7940/// (or (shrl v c0) (shl (shrl v c1) c2)):
7941/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7942///
7943/// Such that in all cases, c3+c2==bitwidth(op v c1).
7945 SDValue ExtractFrom, SDValue &Mask,
7946 const SDLoc &DL) {
7947 assert(OppShift && ExtractFrom && "Empty SDValue");
7948 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7949 return SDValue();
7950
7951 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7952
7953 // Value and Type of the shift.
7954 SDValue OppShiftLHS = OppShift.getOperand(0);
7955 EVT ShiftedVT = OppShiftLHS.getValueType();
7956
7957 // Amount of the existing shift.
7958 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7959
7960 // (add v v) -> (shl v 1)
7961 // TODO: Should this be a general DAG canonicalization?
7962 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7963 ExtractFrom.getOpcode() == ISD::ADD &&
7964 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7965 ExtractFrom.getOperand(0) == OppShiftLHS &&
7966 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7967 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7968 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7969
7970 // Preconditions:
7971 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7972 //
7973 // Find opcode of the needed shift to be extracted from (op0 v c0).
7974 unsigned Opcode = ISD::DELETED_NODE;
7975 bool IsMulOrDiv = false;
7976 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7977 // opcode or its arithmetic (mul or udiv) variant.
7978 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7979 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7980 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7981 return false;
7982 Opcode = NeededShift;
7983 return true;
7984 };
7985 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7986 // that the needed shift can be extracted from.
7987 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7988 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7989 return SDValue();
7990
7991 // op0 must be the same opcode on both sides, have the same LHS argument,
7992 // and produce the same value type.
7993 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7994 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7995 ShiftedVT != ExtractFrom.getValueType())
7996 return SDValue();
7997
7998 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7999 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8000 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8001 ConstantSDNode *ExtractFromCst =
8002 isConstOrConstSplat(ExtractFrom.getOperand(1));
8003 // TODO: We should be able to handle non-uniform constant vectors for these values
8004 // Check that we have constant values.
8005 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8006 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8007 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8008 return SDValue();
8009
8010 // Compute the shift amount we need to extract to complete the rotate.
8011 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8012 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8013 return SDValue();
8014 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8015 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8016 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8017 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8018 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8019
8020 // Now try extract the needed shift from the ExtractFrom op and see if the
8021 // result matches up with the existing shift's LHS op.
8022 if (IsMulOrDiv) {
8023 // Op to extract from is a mul or udiv by a constant.
8024 // Check:
8025 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8026 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8027 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8028 NeededShiftAmt.getZExtValue());
8029 APInt ResultAmt;
8030 APInt Rem;
8031 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8032 if (Rem != 0 || ResultAmt != OppLHSAmt)
8033 return SDValue();
8034 } else {
8035 // Op to extract from is a shift by a constant.
8036 // Check:
8037 // c2 - (bitwidth(op0 v c0) - c1) == c0
8038 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8039 ExtractFromAmt.getBitWidth()))
8040 return SDValue();
8041 }
8042
8043 // Return the expanded shift op that should allow a rotate to be formed.
8044 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8045 EVT ResVT = ExtractFrom.getValueType();
8046 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8047 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8048}
8049
8050// Return true if we can prove that, whenever Neg and Pos are both in the
8051// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8052// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8053//
8054// (or (shift1 X, Neg), (shift2 X, Pos))
8055//
8056// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8057// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8058// to consider shift amounts with defined behavior.
8059//
8060// The IsRotate flag should be set when the LHS of both shifts is the same.
8061// Otherwise if matching a general funnel shift, it should be clear.
8062static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8063 SelectionDAG &DAG, bool IsRotate) {
8064 const auto &TLI = DAG.getTargetLoweringInfo();
8065 // If EltSize is a power of 2 then:
8066 //
8067 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8068 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8069 //
8070 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8071 // for the stronger condition:
8072 //
8073 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8074 //
8075 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8076 // we can just replace Neg with Neg' for the rest of the function.
8077 //
8078 // In other cases we check for the even stronger condition:
8079 //
8080 // Neg == EltSize - Pos [B]
8081 //
8082 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8083 // behavior if Pos == 0 (and consequently Neg == EltSize).
8084 //
8085 // We could actually use [A] whenever EltSize is a power of 2, but the
8086 // only extra cases that it would match are those uninteresting ones
8087 // where Neg and Pos are never in range at the same time. E.g. for
8088 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8089 // as well as (sub 32, Pos), but:
8090 //
8091 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8092 //
8093 // always invokes undefined behavior for 32-bit X.
8094 //
8095 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8096 // This allows us to peek through any operations that only affect Mask's
8097 // un-demanded bits.
8098 //
8099 // NOTE: We can only do this when matching operations which won't modify the
8100 // least Log2(EltSize) significant bits and not a general funnel shift.
8101 unsigned MaskLoBits = 0;
8102 if (IsRotate && isPowerOf2_64(EltSize)) {
8103 unsigned Bits = Log2_64(EltSize);
8104 unsigned NegBits = Neg.getScalarValueSizeInBits();
8105 if (NegBits >= Bits) {
8106 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8107 if (SDValue Inner =
8109 Neg = Inner;
8110 MaskLoBits = Bits;
8111 }
8112 }
8113 }
8114
8115 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8116 if (Neg.getOpcode() != ISD::SUB)
8117 return false;
8119 if (!NegC)
8120 return false;
8121 SDValue NegOp1 = Neg.getOperand(1);
8122
8123 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8124 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8125 // are redundant for the purpose of the equality.
8126 if (MaskLoBits) {
8127 unsigned PosBits = Pos.getScalarValueSizeInBits();
8128 if (PosBits >= MaskLoBits) {
8129 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8130 if (SDValue Inner =
8132 Pos = Inner;
8133 }
8134 }
8135 }
8136
8137 // The condition we need is now:
8138 //
8139 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8140 //
8141 // If NegOp1 == Pos then we need:
8142 //
8143 // EltSize & Mask == NegC & Mask
8144 //
8145 // (because "x & Mask" is a truncation and distributes through subtraction).
8146 //
8147 // We also need to account for a potential truncation of NegOp1 if the amount
8148 // has already been legalized to a shift amount type.
8149 APInt Width;
8150 if ((Pos == NegOp1) ||
8151 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8152 Width = NegC->getAPIntValue();
8153
8154 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8155 // Then the condition we want to prove becomes:
8156 //
8157 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8158 //
8159 // which, again because "x & Mask" is a truncation, becomes:
8160 //
8161 // NegC & Mask == (EltSize - PosC) & Mask
8162 // EltSize & Mask == (NegC + PosC) & Mask
8163 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8164 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8165 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8166 else
8167 return false;
8168 } else
8169 return false;
8170
8171 // Now we just need to check that EltSize & Mask == Width & Mask.
8172 if (MaskLoBits)
8173 // EltSize & Mask is 0 since Mask is EltSize - 1.
8174 return Width.getLoBits(MaskLoBits) == 0;
8175 return Width == EltSize;
8176}
8177
8178// A subroutine of MatchRotate used once we have found an OR of two opposite
8179// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8180// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8181// former being preferred if supported. InnerPos and InnerNeg are Pos and
8182// Neg with outer conversions stripped away.
8183SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8184 SDValue Neg, SDValue InnerPos,
8185 SDValue InnerNeg, bool HasPos,
8186 unsigned PosOpcode, unsigned NegOpcode,
8187 const SDLoc &DL) {
8188 // fold (or (shl x, (*ext y)),
8189 // (srl x, (*ext (sub 32, y)))) ->
8190 // (rotl x, y) or (rotr x, (sub 32, y))
8191 //
8192 // fold (or (shl x, (*ext (sub 32, y))),
8193 // (srl x, (*ext y))) ->
8194 // (rotr x, y) or (rotl x, (sub 32, y))
8195 EVT VT = Shifted.getValueType();
8196 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8197 /*IsRotate*/ true)) {
8198 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8199 HasPos ? Pos : Neg);
8200 }
8201
8202 return SDValue();
8203}
8204
8205// A subroutine of MatchRotate used once we have found an OR of two opposite
8206// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8207// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8208// former being preferred if supported. InnerPos and InnerNeg are Pos and
8209// Neg with outer conversions stripped away.
8210// TODO: Merge with MatchRotatePosNeg.
8211SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8212 SDValue Neg, SDValue InnerPos,
8213 SDValue InnerNeg, bool HasPos,
8214 unsigned PosOpcode, unsigned NegOpcode,
8215 const SDLoc &DL) {
8216 EVT VT = N0.getValueType();
8217 unsigned EltBits = VT.getScalarSizeInBits();
8218
8219 // fold (or (shl x0, (*ext y)),
8220 // (srl x1, (*ext (sub 32, y)))) ->
8221 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8222 //
8223 // fold (or (shl x0, (*ext (sub 32, y))),
8224 // (srl x1, (*ext y))) ->
8225 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8226 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8227 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8228 HasPos ? Pos : Neg);
8229 }
8230
8231 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8232 // so for now just use the PosOpcode case if its legal.
8233 // TODO: When can we use the NegOpcode case?
8234 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8235 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8236 if (Op.getOpcode() != BinOpc)
8237 return false;
8238 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8239 return Cst && (Cst->getAPIntValue() == Imm);
8240 };
8241
8242 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8243 // -> (fshl x0, x1, y)
8244 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8245 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8246 InnerPos == InnerNeg.getOperand(0) &&
8248 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8249 }
8250
8251 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8252 // -> (fshr x0, x1, y)
8253 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8254 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8255 InnerNeg == InnerPos.getOperand(0) &&
8257 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8258 }
8259
8260 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8261 // -> (fshr x0, x1, y)
8262 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8263 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8264 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8265 InnerNeg == InnerPos.getOperand(0) &&
8267 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8268 }
8269 }
8270
8271 return SDValue();
8272}
8273
8274// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8275// idioms for rotate, and if the target supports rotation instructions, generate
8276// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8277// with different shifted sources.
8278SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8279 EVT VT = LHS.getValueType();
8280
8281 // The target must have at least one rotate/funnel flavor.
8282 // We still try to match rotate by constant pre-legalization.
8283 // TODO: Support pre-legalization funnel-shift by constant.
8284 bool HasROTL = hasOperation(ISD::ROTL, VT);
8285 bool HasROTR = hasOperation(ISD::ROTR, VT);
8286 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8287 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8288
8289 // If the type is going to be promoted and the target has enabled custom
8290 // lowering for rotate, allow matching rotate by non-constants. Only allow
8291 // this for scalar types.
8292 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8296 }
8297
8298 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8299 return SDValue();
8300
8301 // Check for truncated rotate.
8302 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8303 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8304 assert(LHS.getValueType() == RHS.getValueType());
8305 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8306 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8307 }
8308 }
8309
8310 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8311 SDValue LHSShift; // The shift.
8312 SDValue LHSMask; // AND value if any.
8313 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8314
8315 SDValue RHSShift; // The shift.
8316 SDValue RHSMask; // AND value if any.
8317 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8318
8319 // If neither side matched a rotate half, bail
8320 if (!LHSShift && !RHSShift)
8321 return SDValue();
8322
8323 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8324 // side of the rotate, so try to handle that here. In all cases we need to
8325 // pass the matched shift from the opposite side to compute the opcode and
8326 // needed shift amount to extract. We still want to do this if both sides
8327 // matched a rotate half because one half may be a potential overshift that
8328 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8329 // single one).
8330
8331 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8332 if (LHSShift)
8333 if (SDValue NewRHSShift =
8334 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8335 RHSShift = NewRHSShift;
8336 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8337 if (RHSShift)
8338 if (SDValue NewLHSShift =
8339 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8340 LHSShift = NewLHSShift;
8341
8342 // If a side is still missing, nothing else we can do.
8343 if (!RHSShift || !LHSShift)
8344 return SDValue();
8345
8346 // At this point we've matched or extracted a shift op on each side.
8347
8348 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8349 return SDValue(); // Shifts must disagree.
8350
8351 // Canonicalize shl to left side in a shl/srl pair.
8352 if (RHSShift.getOpcode() == ISD::SHL) {
8353 std::swap(LHS, RHS);
8354 std::swap(LHSShift, RHSShift);
8355 std::swap(LHSMask, RHSMask);
8356 }
8357
8358 // Something has gone wrong - we've lost the shl/srl pair - bail.
8359 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8360 return SDValue();
8361
8362 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8363 SDValue LHSShiftArg = LHSShift.getOperand(0);
8364 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8365 SDValue RHSShiftArg = RHSShift.getOperand(0);
8366 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8367
8368 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8370 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8371 };
8372
8373 auto ApplyMasks = [&](SDValue Res) {
8374 // If there is an AND of either shifted operand, apply it to the result.
8375 if (LHSMask.getNode() || RHSMask.getNode()) {
8378
8379 if (LHSMask.getNode()) {
8380 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8381 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8382 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8383 }
8384 if (RHSMask.getNode()) {
8385 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8386 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8387 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8388 }
8389
8390 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8391 }
8392
8393 return Res;
8394 };
8395
8396 // TODO: Support pre-legalization funnel-shift by constant.
8397 bool IsRotate = LHSShiftArg == RHSShiftArg;
8398 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8399 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8400 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8401 // Look for a disguised rotate by constant.
8402 // The common shifted operand X may be hidden inside another 'or'.
8403 SDValue X, Y;
8404 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8405 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8406 return false;
8407 if (CommonOp == Or.getOperand(0)) {
8408 X = CommonOp;
8409 Y = Or.getOperand(1);
8410 return true;
8411 }
8412 if (CommonOp == Or.getOperand(1)) {
8413 X = CommonOp;
8414 Y = Or.getOperand(0);
8415 return true;
8416 }
8417 return false;
8418 };
8419
8420 SDValue Res;
8421 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8422 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8423 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8424 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8425 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8426 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8427 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8428 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8429 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8430 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8431 } else {
8432 return SDValue();
8433 }
8434
8435 return ApplyMasks(Res);
8436 }
8437
8438 return SDValue(); // Requires funnel shift support.
8439 }
8440
8441 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8442 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8443 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8444 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8445 // iff C1+C2 == EltSizeInBits
8446 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8447 SDValue Res;
8448 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8449 bool UseROTL = !LegalOperations || HasROTL;
8450 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8451 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8452 } else {
8453 bool UseFSHL = !LegalOperations || HasFSHL;
8454 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8455 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8456 }
8457
8458 return ApplyMasks(Res);
8459 }
8460
8461 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8462 // shift.
8463 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8464 return SDValue();
8465
8466 // If there is a mask here, and we have a variable shift, we can't be sure
8467 // that we're masking out the right stuff.
8468 if (LHSMask.getNode() || RHSMask.getNode())
8469 return SDValue();
8470
8471 // If the shift amount is sign/zext/any-extended just peel it off.
8472 SDValue LExtOp0 = LHSShiftAmt;
8473 SDValue RExtOp0 = RHSShiftAmt;
8474 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8475 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8476 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8477 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8478 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8479 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8480 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8481 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8482 LExtOp0 = LHSShiftAmt.getOperand(0);
8483 RExtOp0 = RHSShiftAmt.getOperand(0);
8484 }
8485
8486 if (IsRotate && (HasROTL || HasROTR)) {
8487 SDValue TryL =
8488 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8489 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8490 if (TryL)
8491 return TryL;
8492
8493 SDValue TryR =
8494 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8495 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8496 if (TryR)
8497 return TryR;
8498 }
8499
8500 SDValue TryL =
8501 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8502 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8503 if (TryL)
8504 return TryL;
8505
8506 SDValue TryR =
8507 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8508 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8509 if (TryR)
8510 return TryR;
8511
8512 return SDValue();
8513}
8514
8515/// Recursively traverses the expression calculating the origin of the requested
8516/// byte of the given value. Returns std::nullopt if the provider can't be
8517/// calculated.
8518///
8519/// For all the values except the root of the expression, we verify that the
8520/// value has exactly one use and if not then return std::nullopt. This way if
8521/// the origin of the byte is returned it's guaranteed that the values which
8522/// contribute to the byte are not used outside of this expression.
8523
8524/// However, there is a special case when dealing with vector loads -- we allow
8525/// more than one use if the load is a vector type. Since the values that
8526/// contribute to the byte ultimately come from the ExtractVectorElements of the
8527/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8528/// because those operations are independent from the pattern to be combined.
8529/// For vector loads, we simply care that the ByteProviders are adjacent
8530/// positions of the same vector, and their index matches the byte that is being
8531/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8532/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8533/// byte position we are trying to provide for the LoadCombine. If these do
8534/// not match, then we can not combine the vector loads. \p Index uses the
8535/// byte position we are trying to provide for and is matched against the
8536/// shl and load size. The \p Index algorithm ensures the requested byte is
8537/// provided for by the pattern, and the pattern does not over provide bytes.
8538///
8539///
8540/// The supported LoadCombine pattern for vector loads is as follows
8541/// or
8542/// / \
8543/// or shl
8544/// / \ |
8545/// or shl zext
8546/// / \ | |
8547/// shl zext zext EVE*
8548/// | | | |
8549/// zext EVE* EVE* LOAD
8550/// | | |
8551/// EVE* LOAD LOAD
8552/// |
8553/// LOAD
8554///
8555/// *ExtractVectorElement
8557
8558static std::optional<SDByteProvider>
8560 std::optional<uint64_t> VectorIndex,
8561 unsigned StartingIndex = 0) {
8562
8563 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8564 if (Depth == 10)
8565 return std::nullopt;
8566
8567 // Only allow multiple uses if the instruction is a vector load (in which
8568 // case we will use the load for every ExtractVectorElement)
8569 if (Depth && !Op.hasOneUse() &&
8570 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8571 return std::nullopt;
8572
8573 // Fail to combine if we have encountered anything but a LOAD after handling
8574 // an ExtractVectorElement.
8575 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8576 return std::nullopt;
8577
8578 unsigned BitWidth = Op.getValueSizeInBits();
8579 if (BitWidth % 8 != 0)
8580 return std::nullopt;
8581 unsigned ByteWidth = BitWidth / 8;
8582 assert(Index < ByteWidth && "invalid index requested");
8583 (void) ByteWidth;
8584
8585 switch (Op.getOpcode()) {
8586 case ISD::OR: {
8587 auto LHS =
8588 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8589 if (!LHS)
8590 return std::nullopt;
8591 auto RHS =
8592 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8593 if (!RHS)
8594 return std::nullopt;
8595
8596 if (LHS->isConstantZero())
8597 return RHS;
8598 if (RHS->isConstantZero())
8599 return LHS;
8600 return std::nullopt;
8601 }
8602 case ISD::SHL: {
8603 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8604 if (!ShiftOp)
8605 return std::nullopt;
8606
8607 uint64_t BitShift = ShiftOp->getZExtValue();
8608
8609 if (BitShift % 8 != 0)
8610 return std::nullopt;
8611 uint64_t ByteShift = BitShift / 8;
8612
8613 // If we are shifting by an amount greater than the index we are trying to
8614 // provide, then do not provide anything. Otherwise, subtract the index by
8615 // the amount we shifted by.
8616 return Index < ByteShift
8618 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8619 Depth + 1, VectorIndex, Index);
8620 }
8621 case ISD::ANY_EXTEND:
8622 case ISD::SIGN_EXTEND:
8623 case ISD::ZERO_EXTEND: {
8624 SDValue NarrowOp = Op->getOperand(0);
8625 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8626 if (NarrowBitWidth % 8 != 0)
8627 return std::nullopt;
8628 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8629
8630 if (Index >= NarrowByteWidth)
8631 return Op.getOpcode() == ISD::ZERO_EXTEND
8632 ? std::optional<SDByteProvider>(
8634 : std::nullopt;
8635 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8636 StartingIndex);
8637 }
8638 case ISD::BSWAP:
8639 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8640 Depth + 1, VectorIndex, StartingIndex);
8642 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8643 if (!OffsetOp)
8644 return std::nullopt;
8645
8646 VectorIndex = OffsetOp->getZExtValue();
8647
8648 SDValue NarrowOp = Op->getOperand(0);
8649 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8650 if (NarrowBitWidth % 8 != 0)
8651 return std::nullopt;
8652 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8653 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8654 // type, leaving the high bits undefined.
8655 if (Index >= NarrowByteWidth)
8656 return std::nullopt;
8657
8658 // Check to see if the position of the element in the vector corresponds
8659 // with the byte we are trying to provide for. In the case of a vector of
8660 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8661 // the element will provide a range of bytes. For example, if we have a
8662 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8663 // 3).
8664 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8665 return std::nullopt;
8666 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8667 return std::nullopt;
8668
8669 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8670 VectorIndex, StartingIndex);
8671 }
8672 case ISD::LOAD: {
8673 auto L = cast<LoadSDNode>(Op.getNode());
8674 if (!L->isSimple() || L->isIndexed())
8675 return std::nullopt;
8676
8677 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8678 if (NarrowBitWidth % 8 != 0)
8679 return std::nullopt;
8680 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8681
8682 // If the width of the load does not reach byte we are trying to provide for
8683 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8684 // question
8685 if (Index >= NarrowByteWidth)
8686 return L->getExtensionType() == ISD::ZEXTLOAD
8687 ? std::optional<SDByteProvider>(
8689 : std::nullopt;
8690
8691 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8692 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8693 }
8694 }
8695
8696 return std::nullopt;
8697}
8698
8699static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8700 return i;
8701}
8702
8703static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8704 return BW - i - 1;
8705}
8706
8707// Check if the bytes offsets we are looking at match with either big or
8708// little endian value loaded. Return true for big endian, false for little
8709// endian, and std::nullopt if match failed.
8710static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8711 int64_t FirstOffset) {
8712 // The endian can be decided only when it is 2 bytes at least.
8713 unsigned Width = ByteOffsets.size();
8714 if (Width < 2)
8715 return std::nullopt;
8716
8717 bool BigEndian = true, LittleEndian = true;
8718 for (unsigned i = 0; i < Width; i++) {
8719 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8720 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8721 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8722 if (!BigEndian && !LittleEndian)
8723 return std::nullopt;
8724 }
8725
8726 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8727 "little endian");
8728 return BigEndian;
8729}
8730
8732 switch (Value.getOpcode()) {
8733 case ISD::TRUNCATE:
8734 case ISD::ZERO_EXTEND:
8735 case ISD::SIGN_EXTEND:
8736 case ISD::ANY_EXTEND:
8737 return stripTruncAndExt(Value.getOperand(0));
8738 }
8739 return Value;
8740}
8741
8742/// Match a pattern where a wide type scalar value is stored by several narrow
8743/// stores. Fold it into a single store or a BSWAP and a store if the targets
8744/// supports it.
8745///
8746/// Assuming little endian target:
8747/// i8 *p = ...
8748/// i32 val = ...
8749/// p[0] = (val >> 0) & 0xFF;
8750/// p[1] = (val >> 8) & 0xFF;
8751/// p[2] = (val >> 16) & 0xFF;
8752/// p[3] = (val >> 24) & 0xFF;
8753/// =>
8754/// *((i32)p) = val;
8755///
8756/// i8 *p = ...
8757/// i32 val = ...
8758/// p[0] = (val >> 24) & 0xFF;
8759/// p[1] = (val >> 16) & 0xFF;
8760/// p[2] = (val >> 8) & 0xFF;
8761/// p[3] = (val >> 0) & 0xFF;
8762/// =>
8763/// *((i32)p) = BSWAP(val);
8764SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8765 // The matching looks for "store (trunc x)" patterns that appear early but are
8766 // likely to be replaced by truncating store nodes during combining.
8767 // TODO: If there is evidence that running this later would help, this
8768 // limitation could be removed. Legality checks may need to be added
8769 // for the created store and optional bswap/rotate.
8770 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8771 return SDValue();
8772
8773 // We only handle merging simple stores of 1-4 bytes.
8774 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8775 EVT MemVT = N->getMemoryVT();
8776 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8777 !N->isSimple() || N->isIndexed())
8778 return SDValue();
8779
8780 // Collect all of the stores in the chain, upto the maximum store width (i64).
8781 SDValue Chain = N->getChain();
8783 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8784 unsigned MaxWideNumBits = 64;
8785 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8786 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8787 // All stores must be the same size to ensure that we are writing all of the
8788 // bytes in the wide value.
8789 // This store should have exactly one use as a chain operand for another
8790 // store in the merging set. If there are other chain uses, then the
8791 // transform may not be safe because order of loads/stores outside of this
8792 // set may not be preserved.
8793 // TODO: We could allow multiple sizes by tracking each stored byte.
8794 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8795 Store->isIndexed() || !Store->hasOneUse())
8796 return SDValue();
8797 Stores.push_back(Store);
8798 Chain = Store->getChain();
8799 if (MaxStores < Stores.size())
8800 return SDValue();
8801 }
8802 // There is no reason to continue if we do not have at least a pair of stores.
8803 if (Stores.size() < 2)
8804 return SDValue();
8805
8806 // Handle simple types only.
8807 LLVMContext &Context = *DAG.getContext();
8808 unsigned NumStores = Stores.size();
8809 unsigned WideNumBits = NumStores * NarrowNumBits;
8810 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8811 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8812 return SDValue();
8813
8814 // Check if all bytes of the source value that we are looking at are stored
8815 // to the same base address. Collect offsets from Base address into OffsetMap.
8816 SDValue SourceValue;
8817 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8818 int64_t FirstOffset = INT64_MAX;
8819 StoreSDNode *FirstStore = nullptr;
8820 std::optional<BaseIndexOffset> Base;
8821 for (auto *Store : Stores) {
8822 // All the stores store different parts of the CombinedValue. A truncate is
8823 // required to get the partial value.
8824 SDValue Trunc = Store->getValue();
8825 if (Trunc.getOpcode() != ISD::TRUNCATE)
8826 return SDValue();
8827 // Other than the first/last part, a shift operation is required to get the
8828 // offset.
8829 int64_t Offset = 0;
8830 SDValue WideVal = Trunc.getOperand(0);
8831 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8832 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8833 // The shift amount must be a constant multiple of the narrow type.
8834 // It is translated to the offset address in the wide source value "y".
8835 //
8836 // x = srl y, ShiftAmtC
8837 // i8 z = trunc x
8838 // store z, ...
8839 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8840 if (ShiftAmtC % NarrowNumBits != 0)
8841 return SDValue();
8842
8843 Offset = ShiftAmtC / NarrowNumBits;
8844 WideVal = WideVal.getOperand(0);
8845 }
8846
8847 // Stores must share the same source value with different offsets.
8848 // Truncate and extends should be stripped to get the single source value.
8849 if (!SourceValue)
8850 SourceValue = WideVal;
8851 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8852 return SDValue();
8853 else if (SourceValue.getValueType() != WideVT) {
8854 if (WideVal.getValueType() == WideVT ||
8855 WideVal.getScalarValueSizeInBits() >
8856 SourceValue.getScalarValueSizeInBits())
8857 SourceValue = WideVal;
8858 // Give up if the source value type is smaller than the store size.
8859 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8860 return SDValue();
8861 }
8862
8863 // Stores must share the same base address.
8865 int64_t ByteOffsetFromBase = 0;
8866 if (!Base)
8867 Base = Ptr;
8868 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8869 return SDValue();
8870
8871 // Remember the first store.
8872 if (ByteOffsetFromBase < FirstOffset) {
8873 FirstStore = Store;
8874 FirstOffset = ByteOffsetFromBase;
8875 }
8876 // Map the offset in the store and the offset in the combined value, and
8877 // early return if it has been set before.
8878 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8879 return SDValue();
8880 OffsetMap[Offset] = ByteOffsetFromBase;
8881 }
8882
8883 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8884 assert(FirstStore && "First store must be set");
8885
8886 // Check that a store of the wide type is both allowed and fast on the target
8887 const DataLayout &Layout = DAG.getDataLayout();
8888 unsigned Fast = 0;
8889 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8890 *FirstStore->getMemOperand(), &Fast);
8891 if (!Allowed || !Fast)
8892 return SDValue();
8893
8894 // Check if the pieces of the value are going to the expected places in memory
8895 // to merge the stores.
8896 auto checkOffsets = [&](bool MatchLittleEndian) {
8897 if (MatchLittleEndian) {
8898 for (unsigned i = 0; i != NumStores; ++i)
8899 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8900 return false;
8901 } else { // MatchBigEndian by reversing loop counter.
8902 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8903 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8904 return false;
8905 }
8906 return true;
8907 };
8908
8909 // Check if the offsets line up for the native data layout of this target.
8910 bool NeedBswap = false;
8911 bool NeedRotate = false;
8912 if (!checkOffsets(Layout.isLittleEndian())) {
8913 // Special-case: check if byte offsets line up for the opposite endian.
8914 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8915 NeedBswap = true;
8916 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8917 NeedRotate = true;
8918 else
8919 return SDValue();
8920 }
8921
8922 SDLoc DL(N);
8923 if (WideVT != SourceValue.getValueType()) {
8924 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8925 "Unexpected store value to merge");
8926 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8927 }
8928
8929 // Before legalize we can introduce illegal bswaps/rotates which will be later
8930 // converted to an explicit bswap sequence. This way we end up with a single
8931 // store and byte shuffling instead of several stores and byte shuffling.
8932 if (NeedBswap) {
8933 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8934 } else if (NeedRotate) {
8935 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8936 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8937 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8938 }
8939
8940 SDValue NewStore =
8941 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8942 FirstStore->getPointerInfo(), FirstStore->getAlign());
8943
8944 // Rely on other DAG combine rules to remove the other individual stores.
8945 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8946 return NewStore;
8947}
8948
8949/// Match a pattern where a wide type scalar value is loaded by several narrow
8950/// loads and combined by shifts and ors. Fold it into a single load or a load
8951/// and a BSWAP if the targets supports it.
8952///
8953/// Assuming little endian target:
8954/// i8 *a = ...
8955/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8956/// =>
8957/// i32 val = *((i32)a)
8958///
8959/// i8 *a = ...
8960/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8961/// =>
8962/// i32 val = BSWAP(*((i32)a))
8963///
8964/// TODO: This rule matches complex patterns with OR node roots and doesn't
8965/// interact well with the worklist mechanism. When a part of the pattern is
8966/// updated (e.g. one of the loads) its direct users are put into the worklist,
8967/// but the root node of the pattern which triggers the load combine is not
8968/// necessarily a direct user of the changed node. For example, once the address
8969/// of t28 load is reassociated load combine won't be triggered:
8970/// t25: i32 = add t4, Constant:i32<2>
8971/// t26: i64 = sign_extend t25
8972/// t27: i64 = add t2, t26
8973/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8974/// t29: i32 = zero_extend t28
8975/// t32: i32 = shl t29, Constant:i8<8>
8976/// t33: i32 = or t23, t32
8977/// As a possible fix visitLoad can check if the load can be a part of a load
8978/// combine pattern and add corresponding OR roots to the worklist.
8979SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8980 assert(N->getOpcode() == ISD::OR &&
8981 "Can only match load combining against OR nodes");
8982
8983 // Handles simple types only
8984 EVT VT = N->getValueType(0);
8985 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8986 return SDValue();
8987 unsigned ByteWidth = VT.getSizeInBits() / 8;
8988
8989 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8990 auto MemoryByteOffset = [&](SDByteProvider P) {
8991 assert(P.hasSrc() && "Must be a memory byte provider");
8992 auto *Load = cast<LoadSDNode>(P.Src.value());
8993
8994 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
8995
8996 assert(LoadBitWidth % 8 == 0 &&
8997 "can only analyze providers for individual bytes not bit");
8998 unsigned LoadByteWidth = LoadBitWidth / 8;
8999 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9000 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9001 };
9002
9003 std::optional<BaseIndexOffset> Base;
9004 SDValue Chain;
9005
9007 std::optional<SDByteProvider> FirstByteProvider;
9008 int64_t FirstOffset = INT64_MAX;
9009
9010 // Check if all the bytes of the OR we are looking at are loaded from the same
9011 // base address. Collect bytes offsets from Base address in ByteOffsets.
9012 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9013 unsigned ZeroExtendedBytes = 0;
9014 for (int i = ByteWidth - 1; i >= 0; --i) {
9015 auto P =
9016 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9017 /*StartingIndex*/ i);
9018 if (!P)
9019 return SDValue();
9020
9021 if (P->isConstantZero()) {
9022 // It's OK for the N most significant bytes to be 0, we can just
9023 // zero-extend the load.
9024 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9025 return SDValue();
9026 continue;
9027 }
9028 assert(P->hasSrc() && "provenance should either be memory or zero");
9029 auto *L = cast<LoadSDNode>(P->Src.value());
9030
9031 // All loads must share the same chain
9032 SDValue LChain = L->getChain();
9033 if (!Chain)
9034 Chain = LChain;
9035 else if (Chain != LChain)
9036 return SDValue();
9037
9038 // Loads must share the same base address
9040 int64_t ByteOffsetFromBase = 0;
9041
9042 // For vector loads, the expected load combine pattern will have an
9043 // ExtractElement for each index in the vector. While each of these
9044 // ExtractElements will be accessing the same base address as determined
9045 // by the load instruction, the actual bytes they interact with will differ
9046 // due to different ExtractElement indices. To accurately determine the
9047 // byte position of an ExtractElement, we offset the base load ptr with
9048 // the index multiplied by the byte size of each element in the vector.
9049 if (L->getMemoryVT().isVector()) {
9050 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9051 if (LoadWidthInBit % 8 != 0)
9052 return SDValue();
9053 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9054 Ptr.addToOffset(ByteOffsetFromVector);
9055 }
9056
9057 if (!Base)
9058 Base = Ptr;
9059
9060 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9061 return SDValue();
9062
9063 // Calculate the offset of the current byte from the base address
9064 ByteOffsetFromBase += MemoryByteOffset(*P);
9065 ByteOffsets[i] = ByteOffsetFromBase;
9066
9067 // Remember the first byte load
9068 if (ByteOffsetFromBase < FirstOffset) {
9069 FirstByteProvider = P;
9070 FirstOffset = ByteOffsetFromBase;
9071 }
9072
9073 Loads.insert(L);
9074 }
9075
9076 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9077 "memory, so there must be at least one load which produces the value");
9078 assert(Base && "Base address of the accessed memory location must be set");
9079 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9080
9081 bool NeedsZext = ZeroExtendedBytes > 0;
9082
9083 EVT MemVT =
9084 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9085
9086 if (!MemVT.isSimple())
9087 return SDValue();
9088
9089 // Before legalize we can introduce too wide illegal loads which will be later
9090 // split into legal sized loads. This enables us to combine i64 load by i8
9091 // patterns to a couple of i32 loads on 32 bit targets.
9092 if (LegalOperations &&
9094 MemVT))
9095 return SDValue();
9096
9097 // Check if the bytes of the OR we are looking at match with either big or
9098 // little endian value load
9099 std::optional<bool> IsBigEndian = isBigEndian(
9100 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9101 if (!IsBigEndian)
9102 return SDValue();
9103
9104 assert(FirstByteProvider && "must be set");
9105
9106 // Ensure that the first byte is loaded from zero offset of the first load.
9107 // So the combined value can be loaded from the first load address.
9108 if (MemoryByteOffset(*FirstByteProvider) != 0)
9109 return SDValue();
9110 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9111
9112 // The node we are looking at matches with the pattern, check if we can
9113 // replace it with a single (possibly zero-extended) load and bswap + shift if
9114 // needed.
9115
9116 // If the load needs byte swap check if the target supports it
9117 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9118
9119 // Before legalize we can introduce illegal bswaps which will be later
9120 // converted to an explicit bswap sequence. This way we end up with a single
9121 // load and byte shuffling instead of several loads and byte shuffling.
9122 // We do not introduce illegal bswaps when zero-extending as this tends to
9123 // introduce too many arithmetic instructions.
9124 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9125 !TLI.isOperationLegal(ISD::BSWAP, VT))
9126 return SDValue();
9127
9128 // If we need to bswap and zero extend, we have to insert a shift. Check that
9129 // it is legal.
9130 if (NeedsBswap && NeedsZext && LegalOperations &&
9131 !TLI.isOperationLegal(ISD::SHL, VT))
9132 return SDValue();
9133
9134 // Check that a load of the wide type is both allowed and fast on the target
9135 unsigned Fast = 0;
9136 bool Allowed =
9137 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9138 *FirstLoad->getMemOperand(), &Fast);
9139 if (!Allowed || !Fast)
9140 return SDValue();
9141
9142 SDValue NewLoad =
9143 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9144 Chain, FirstLoad->getBasePtr(),
9145 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9146
9147 // Transfer chain users from old loads to the new load.
9148 for (LoadSDNode *L : Loads)
9149 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9150
9151 if (!NeedsBswap)
9152 return NewLoad;
9153
9154 SDValue ShiftedLoad =
9155 NeedsZext
9156 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9157 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9158 SDLoc(N), LegalOperations))
9159 : NewLoad;
9160 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9161}
9162
9163// If the target has andn, bsl, or a similar bit-select instruction,
9164// we want to unfold masked merge, with canonical pattern of:
9165// | A | |B|
9166// ((x ^ y) & m) ^ y
9167// | D |
9168// Into:
9169// (x & m) | (y & ~m)
9170// If y is a constant, m is not a 'not', and the 'andn' does not work with
9171// immediates, we unfold into a different pattern:
9172// ~(~x & m) & (m | y)
9173// If x is a constant, m is a 'not', and the 'andn' does not work with
9174// immediates, we unfold into a different pattern:
9175// (x | ~m) & ~(~m & ~y)
9176// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9177// the very least that breaks andnpd / andnps patterns, and because those
9178// patterns are simplified in IR and shouldn't be created in the DAG
9179SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9180 assert(N->getOpcode() == ISD::XOR);
9181
9182 // Don't touch 'not' (i.e. where y = -1).
9183 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9184 return SDValue();
9185
9186 EVT VT = N->getValueType(0);
9187
9188 // There are 3 commutable operators in the pattern,
9189 // so we have to deal with 8 possible variants of the basic pattern.
9190 SDValue X, Y, M;
9191 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9192 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9193 return false;
9194 SDValue Xor = And.getOperand(XorIdx);
9195 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9196 return false;
9197 SDValue Xor0 = Xor.getOperand(0);
9198 SDValue Xor1 = Xor.getOperand(1);
9199 // Don't touch 'not' (i.e. where y = -1).
9200 if (isAllOnesOrAllOnesSplat(Xor1))
9201 return false;
9202 if (Other == Xor0)
9203 std::swap(Xor0, Xor1);
9204 if (Other != Xor1)
9205 return false;
9206 X = Xor0;
9207 Y = Xor1;
9208 M = And.getOperand(XorIdx ? 0 : 1);
9209 return true;
9210 };
9211
9212 SDValue N0 = N->getOperand(0);
9213 SDValue N1 = N->getOperand(1);
9214 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9215 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9216 return SDValue();
9217
9218 // Don't do anything if the mask is constant. This should not be reachable.
9219 // InstCombine should have already unfolded this pattern, and DAGCombiner
9220 // probably shouldn't produce it, too.
9221 if (isa<ConstantSDNode>(M.getNode()))
9222 return SDValue();
9223
9224 // We can transform if the target has AndNot
9225 if (!TLI.hasAndNot(M))
9226 return SDValue();
9227
9228 SDLoc DL(N);
9229
9230 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9231 // a bitwise not that would already allow ANDN to be used.
9232 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9233 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9234 // If not, we need to do a bit more work to make sure andn is still used.
9235 SDValue NotX = DAG.getNOT(DL, X, VT);
9236 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9237 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9238 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9239 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9240 }
9241
9242 // If X is a constant and M is a bitwise not, check that 'andn' works with
9243 // immediates.
9244 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9245 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9246 // If not, we need to do a bit more work to make sure andn is still used.
9247 SDValue NotM = M.getOperand(0);
9248 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9249 SDValue NotY = DAG.getNOT(DL, Y, VT);
9250 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9251 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9252 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9253 }
9254
9255 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9256 SDValue NotM = DAG.getNOT(DL, M, VT);
9257 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9258
9259 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9260}
9261
9262SDValue DAGCombiner::visitXOR(SDNode *N) {
9263 SDValue N0 = N->getOperand(0);
9264 SDValue N1 = N->getOperand(1);
9265 EVT VT = N0.getValueType();
9266 SDLoc DL(N);
9267
9268 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9269 if (N0.isUndef() && N1.isUndef())
9270 return DAG.getConstant(0, DL, VT);
9271
9272 // fold (xor x, undef) -> undef
9273 if (N0.isUndef())
9274 return N0;
9275 if (N1.isUndef())
9276 return N1;
9277
9278 // fold (xor c1, c2) -> c1^c2
9279 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9280 return C;
9281
9282 // canonicalize constant to RHS
9285 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9286
9287 // fold vector ops
9288 if (VT.isVector()) {
9289 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9290 return FoldedVOp;
9291
9292 // fold (xor x, 0) -> x, vector edition
9294 return N0;
9295 }
9296
9297 // fold (xor x, 0) -> x
9298 if (isNullConstant(N1))
9299 return N0;
9300
9301 if (SDValue NewSel = foldBinOpIntoSelect(N))
9302 return NewSel;
9303
9304 // reassociate xor
9305 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9306 return RXOR;
9307
9308 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9309 if (SDValue SD =
9310 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9311 return SD;
9312
9313 // fold (a^b) -> (a|b) iff a and b share no bits.
9314 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9315 DAG.haveNoCommonBitsSet(N0, N1)) {
9317 Flags.setDisjoint(true);
9318 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9319 }
9320
9321 // look for 'add-like' folds:
9322 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9323 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9325 if (SDValue Combined = visitADDLike(N))
9326 return Combined;
9327
9328 // fold !(x cc y) -> (x !cc y)
9329 unsigned N0Opcode = N0.getOpcode();
9330 SDValue LHS, RHS, CC;
9331 if (TLI.isConstTrueVal(N1) &&
9332 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9333 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9334 LHS.getValueType());
9335 if (!LegalOperations ||
9336 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9337 switch (N0Opcode) {
9338 default:
9339 llvm_unreachable("Unhandled SetCC Equivalent!");
9340 case ISD::SETCC:
9341 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9342 case ISD::SELECT_CC:
9343 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9344 N0.getOperand(3), NotCC);
9345 case ISD::STRICT_FSETCC:
9346 case ISD::STRICT_FSETCCS: {
9347 if (N0.hasOneUse()) {
9348 // FIXME Can we handle multiple uses? Could we token factor the chain
9349 // results from the new/old setcc?
9350 SDValue SetCC =
9351 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9352 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9353 CombineTo(N, SetCC);
9354 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9355 recursivelyDeleteUnusedNodes(N0.getNode());
9356 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9357 }
9358 break;
9359 }
9360 }
9361 }
9362 }
9363
9364 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9365 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9366 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9367 SDValue V = N0.getOperand(0);
9368 SDLoc DL0(N0);
9369 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9370 DAG.getConstant(1, DL0, V.getValueType()));
9371 AddToWorklist(V.getNode());
9372 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9373 }
9374
9375 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9376 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9377 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9378 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9379 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9380 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9381 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9382 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9383 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9384 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9385 }
9386 }
9387 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9388 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9389 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9390 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9391 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9392 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9393 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9394 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9395 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9396 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9397 }
9398 }
9399
9400 // fold (not (neg x)) -> (add X, -1)
9401 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9402 // Y is a constant or the subtract has a single use.
9403 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9404 isNullConstant(N0.getOperand(0))) {
9405 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9406 DAG.getAllOnesConstant(DL, VT));
9407 }
9408
9409 // fold (not (add X, -1)) -> (neg X)
9410 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9412 return DAG.getNegative(N0.getOperand(0), DL, VT);
9413 }
9414
9415 // fold (xor (and x, y), y) -> (and (not x), y)
9416 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9417 SDValue X = N0.getOperand(0);
9418 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9419 AddToWorklist(NotX.getNode());
9420 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9421 }
9422
9423 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9424 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9425 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9426 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9427 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9428 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9429 SDValue S0 = S.getOperand(0);
9430 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9432 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9433 return DAG.getNode(ISD::ABS, DL, VT, S0);
9434 }
9435 }
9436
9437 // fold (xor x, x) -> 0
9438 if (N0 == N1)
9439 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9440
9441 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9442 // Here is a concrete example of this equivalence:
9443 // i16 x == 14
9444 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9445 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9446 //
9447 // =>
9448 //
9449 // i16 ~1 == 0b1111111111111110
9450 // i16 rol(~1, 14) == 0b1011111111111111
9451 //
9452 // Some additional tips to help conceptualize this transform:
9453 // - Try to see the operation as placing a single zero in a value of all ones.
9454 // - There exists no value for x which would allow the result to contain zero.
9455 // - Values of x larger than the bitwidth are undefined and do not require a
9456 // consistent result.
9457 // - Pushing the zero left requires shifting one bits in from the right.
9458 // A rotate left of ~1 is a nice way of achieving the desired result.
9459 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9461 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9462 N0.getOperand(1));
9463 }
9464
9465 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9466 if (N0Opcode == N1.getOpcode())
9467 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9468 return V;
9469
9470 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9471 return R;
9472 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9473 return R;
9474 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9475 return R;
9476
9477 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9478 if (SDValue MM = unfoldMaskedMerge(N))
9479 return MM;
9480
9481 // Simplify the expression using non-local knowledge.
9483 return SDValue(N, 0);
9484
9485 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9486 return Combined;
9487
9488 return SDValue();
9489}
9490
9491/// If we have a shift-by-constant of a bitwise logic op that itself has a
9492/// shift-by-constant operand with identical opcode, we may be able to convert
9493/// that into 2 independent shifts followed by the logic op. This is a
9494/// throughput improvement.
9496 // Match a one-use bitwise logic op.
9497 SDValue LogicOp = Shift->getOperand(0);
9498 if (!LogicOp.hasOneUse())
9499 return SDValue();
9500
9501 unsigned LogicOpcode = LogicOp.getOpcode();
9502 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9503 LogicOpcode != ISD::XOR)
9504 return SDValue();
9505
9506 // Find a matching one-use shift by constant.
9507 unsigned ShiftOpcode = Shift->getOpcode();
9508 SDValue C1 = Shift->getOperand(1);
9509 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9510 assert(C1Node && "Expected a shift with constant operand");
9511 const APInt &C1Val = C1Node->getAPIntValue();
9512 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9513 const APInt *&ShiftAmtVal) {
9514 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9515 return false;
9516
9517 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9518 if (!ShiftCNode)
9519 return false;
9520
9521 // Capture the shifted operand and shift amount value.
9522 ShiftOp = V.getOperand(0);
9523 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9524
9525 // Shift amount types do not have to match their operand type, so check that
9526 // the constants are the same width.
9527 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9528 return false;
9529
9530 // The fold is not valid if the sum of the shift values doesn't fit in the
9531 // given shift amount type.
9532 bool Overflow = false;
9533 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9534 if (Overflow)
9535 return false;
9536
9537 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9538 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9539 return false;
9540
9541 return true;
9542 };
9543
9544 // Logic ops are commutative, so check each operand for a match.
9545 SDValue X, Y;
9546 const APInt *C0Val;
9547 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9548 Y = LogicOp.getOperand(1);
9549 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9550 Y = LogicOp.getOperand(0);
9551 else
9552 return SDValue();
9553
9554 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9555 SDLoc DL(Shift);
9556 EVT VT = Shift->getValueType(0);
9557 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9558 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9559 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9560 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9561 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9562 LogicOp->getFlags());
9563}
9564
9565/// Handle transforms common to the three shifts, when the shift amount is a
9566/// constant.
9567/// We are looking for: (shift being one of shl/sra/srl)
9568/// shift (binop X, C0), C1
9569/// And want to transform into:
9570/// binop (shift X, C1), (shift C0, C1)
9571SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9572 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9573
9574 // Do not turn a 'not' into a regular xor.
9575 if (isBitwiseNot(N->getOperand(0)))
9576 return SDValue();
9577
9578 // The inner binop must be one-use, since we want to replace it.
9579 SDValue LHS = N->getOperand(0);
9580 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9581 return SDValue();
9582
9583 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9584 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9585 return R;
9586
9587 // We want to pull some binops through shifts, so that we have (and (shift))
9588 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9589 // thing happens with address calculations, so it's important to canonicalize
9590 // it.
9591 switch (LHS.getOpcode()) {
9592 default:
9593 return SDValue();
9594 case ISD::OR:
9595 case ISD::XOR:
9596 case ISD::AND:
9597 break;
9598 case ISD::ADD:
9599 if (N->getOpcode() != ISD::SHL)
9600 return SDValue(); // only shl(add) not sr[al](add).
9601 break;
9602 }
9603
9604 // FIXME: disable this unless the input to the binop is a shift by a constant
9605 // or is copy/select. Enable this in other cases when figure out it's exactly
9606 // profitable.
9607 SDValue BinOpLHSVal = LHS.getOperand(0);
9608 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9609 BinOpLHSVal.getOpcode() == ISD::SRA ||
9610 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9611 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9612 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9613 BinOpLHSVal.getOpcode() == ISD::SELECT;
9614
9615 if (!IsShiftByConstant && !IsCopyOrSelect)
9616 return SDValue();
9617
9618 if (IsCopyOrSelect && N->hasOneUse())
9619 return SDValue();
9620
9621 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9622 SDLoc DL(N);
9623 EVT VT = N->getValueType(0);
9624 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9625 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9626 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9627 N->getOperand(1));
9628 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9629 }
9630
9631 return SDValue();
9632}
9633
9634SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9635 assert(N->getOpcode() == ISD::TRUNCATE);
9636 assert(N->getOperand(0).getOpcode() == ISD::AND);
9637
9638 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9639 EVT TruncVT = N->getValueType(0);
9640 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9641 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9642 SDValue N01 = N->getOperand(0).getOperand(1);
9643 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9644 SDLoc DL(N);
9645 SDValue N00 = N->getOperand(0).getOperand(0);
9646 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9647 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9648 AddToWorklist(Trunc00.getNode());
9649 AddToWorklist(Trunc01.getNode());
9650 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9651 }
9652 }
9653
9654 return SDValue();
9655}
9656
9657SDValue DAGCombiner::visitRotate(SDNode *N) {
9658 SDLoc dl(N);
9659 SDValue N0 = N->getOperand(0);
9660 SDValue N1 = N->getOperand(1);
9661 EVT VT = N->getValueType(0);
9662 unsigned Bitsize = VT.getScalarSizeInBits();
9663
9664 // fold (rot x, 0) -> x
9665 if (isNullOrNullSplat(N1))
9666 return N0;
9667
9668 // fold (rot x, c) -> x iff (c % BitSize) == 0
9669 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9670 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9671 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9672 return N0;
9673 }
9674
9675 // fold (rot x, c) -> (rot x, c % BitSize)
9676 bool OutOfRange = false;
9677 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9678 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9679 return true;
9680 };
9681 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9682 EVT AmtVT = N1.getValueType();
9683 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9684 if (SDValue Amt =
9685 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9686 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9687 }
9688
9689 // rot i16 X, 8 --> bswap X
9690 auto *RotAmtC = isConstOrConstSplat(N1);
9691 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9692 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9693 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9694
9695 // Simplify the operands using demanded-bits information.
9697 return SDValue(N, 0);
9698
9699 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9700 if (N1.getOpcode() == ISD::TRUNCATE &&
9701 N1.getOperand(0).getOpcode() == ISD::AND) {
9702 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9703 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9704 }
9705
9706 unsigned NextOp = N0.getOpcode();
9707
9708 // fold (rot* (rot* x, c2), c1)
9709 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9710 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9713 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9714 EVT ShiftVT = C1->getValueType(0);
9715 bool SameSide = (N->getOpcode() == NextOp);
9716 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9717 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9718 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9719 {N1, BitsizeC});
9720 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9721 {N0.getOperand(1), BitsizeC});
9722 if (Norm1 && Norm2)
9723 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9724 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9725 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9726 {CombinedShift, BitsizeC});
9727 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9728 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9729 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9730 CombinedShiftNorm);
9731 }
9732 }
9733 }
9734 return SDValue();
9735}
9736
9737SDValue DAGCombiner::visitSHL(SDNode *N) {
9738 SDValue N0 = N->getOperand(0);
9739 SDValue N1 = N->getOperand(1);
9740 if (SDValue V = DAG.simplifyShift(N0, N1))
9741 return V;
9742
9743 EVT VT = N0.getValueType();
9744 EVT ShiftVT = N1.getValueType();
9745 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9746
9747 // fold (shl c1, c2) -> c1<<c2
9748 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9749 return C;
9750
9751 // fold vector ops
9752 if (VT.isVector()) {
9753 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9754 return FoldedVOp;
9755
9756 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9757 // If setcc produces all-one true value then:
9758 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9759 if (N1CV && N1CV->isConstant()) {
9760 if (N0.getOpcode() == ISD::AND) {
9761 SDValue N00 = N0->getOperand(0);
9762 SDValue N01 = N0->getOperand(1);
9763 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9764
9765 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9768 if (SDValue C =
9769 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9770 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9771 }
9772 }
9773 }
9774 }
9775
9776 if (SDValue NewSel = foldBinOpIntoSelect(N))
9777 return NewSel;
9778
9779 // if (shl x, c) is known to be zero, return 0
9780 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9781 return DAG.getConstant(0, SDLoc(N), VT);
9782
9783 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9784 if (N1.getOpcode() == ISD::TRUNCATE &&
9785 N1.getOperand(0).getOpcode() == ISD::AND) {
9786 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9787 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9788 }
9789
9790 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9791 if (N0.getOpcode() == ISD::SHL) {
9792 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9794 APInt c1 = LHS->getAPIntValue();
9795 APInt c2 = RHS->getAPIntValue();
9796 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9797 return (c1 + c2).uge(OpSizeInBits);
9798 };
9799 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9800 return DAG.getConstant(0, SDLoc(N), VT);
9801
9802 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9804 APInt c1 = LHS->getAPIntValue();
9805 APInt c2 = RHS->getAPIntValue();
9806 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9807 return (c1 + c2).ult(OpSizeInBits);
9808 };
9809 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9810 SDLoc DL(N);
9811 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9812 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9813 }
9814 }
9815
9816 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9817 // For this to be valid, the second form must not preserve any of the bits
9818 // that are shifted out by the inner shift in the first form. This means
9819 // the outer shift size must be >= the number of bits added by the ext.
9820 // As a corollary, we don't care what kind of ext it is.
9821 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9822 N0.getOpcode() == ISD::ANY_EXTEND ||
9823 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9824 N0.getOperand(0).getOpcode() == ISD::SHL) {
9825 SDValue N0Op0 = N0.getOperand(0);
9826 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9827 EVT InnerVT = N0Op0.getValueType();
9828 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9829
9830 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9832 APInt c1 = LHS->getAPIntValue();
9833 APInt c2 = RHS->getAPIntValue();
9834 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9835 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9836 (c1 + c2).uge(OpSizeInBits);
9837 };
9838 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9839 /*AllowUndefs*/ false,
9840 /*AllowTypeMismatch*/ true))
9841 return DAG.getConstant(0, SDLoc(N), VT);
9842
9843 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9845 APInt c1 = LHS->getAPIntValue();
9846 APInt c2 = RHS->getAPIntValue();
9847 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9848 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9849 (c1 + c2).ult(OpSizeInBits);
9850 };
9851 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9852 /*AllowUndefs*/ false,
9853 /*AllowTypeMismatch*/ true)) {
9854 SDLoc DL(N);
9855 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9856 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9857 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9858 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9859 }
9860 }
9861
9862 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9863 // Only fold this if the inner zext has no other uses to avoid increasing
9864 // the total number of instructions.
9865 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9866 N0.getOperand(0).getOpcode() == ISD::SRL) {
9867 SDValue N0Op0 = N0.getOperand(0);
9868 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9869
9870 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9871 APInt c1 = LHS->getAPIntValue();
9872 APInt c2 = RHS->getAPIntValue();
9873 zeroExtendToMatch(c1, c2);
9874 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9875 };
9876 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9877 /*AllowUndefs*/ false,
9878 /*AllowTypeMismatch*/ true)) {
9879 SDLoc DL(N);
9880 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9881 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9882 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9883 AddToWorklist(NewSHL.getNode());
9884 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9885 }
9886 }
9887
9888 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9889 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9891 const APInt &LHSC = LHS->getAPIntValue();
9892 const APInt &RHSC = RHS->getAPIntValue();
9893 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9894 LHSC.getZExtValue() <= RHSC.getZExtValue();
9895 };
9896
9897 SDLoc DL(N);
9898
9899 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9900 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9901 if (N0->getFlags().hasExact()) {
9902 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9903 /*AllowUndefs*/ false,
9904 /*AllowTypeMismatch*/ true)) {
9905 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9906 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9907 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9908 }
9909 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9910 /*AllowUndefs*/ false,
9911 /*AllowTypeMismatch*/ true)) {
9912 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9913 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9914 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9915 }
9916 }
9917
9918 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9919 // (and (srl x, (sub c1, c2), MASK)
9920 // Only fold this if the inner shift has no other uses -- if it does,
9921 // folding this will increase the total number of instructions.
9922 if (N0.getOpcode() == ISD::SRL &&
9923 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9925 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9926 /*AllowUndefs*/ false,
9927 /*AllowTypeMismatch*/ true)) {
9928 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9929 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9930 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9931 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9932 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9933 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9934 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9935 }
9936 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9937 /*AllowUndefs*/ false,
9938 /*AllowTypeMismatch*/ true)) {
9939 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9940 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9941 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9942 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9943 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9944 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9945 }
9946 }
9947 }
9948
9949 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9950 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9951 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9952 SDLoc DL(N);
9953 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9954 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9955 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9956 }
9957
9958 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9959 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9960 // Variant of version done on multiply, except mul by a power of 2 is turned
9961 // into a shift.
9962 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9963 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
9964 SDValue N01 = N0.getOperand(1);
9965 if (SDValue Shl1 =
9966 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
9967 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9968 AddToWorklist(Shl0.getNode());
9970 // Preserve the disjoint flag for Or.
9971 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
9972 Flags.setDisjoint(true);
9973 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
9974 }
9975 }
9976
9977 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
9978 // TODO: Add zext/add_nuw variant with suitable test coverage
9979 // TODO: Should we limit this with isLegalAddImmediate?
9980 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
9981 N0.getOperand(0).getOpcode() == ISD::ADD &&
9982 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
9983 N0.getOperand(0)->hasOneUse() &&
9984 TLI.isDesirableToCommuteWithShift(N, Level)) {
9985 SDValue Add = N0.getOperand(0);
9986 SDLoc DL(N0);
9987 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
9988 {Add.getOperand(1)})) {
9989 if (SDValue ShlC =
9990 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
9991 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
9992 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
9993 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
9994 }
9995 }
9996 }
9997
9998 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9999 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10000 SDValue N01 = N0.getOperand(1);
10001 if (SDValue Shl =
10002 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10003 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
10004 }
10005
10007 if (N1C && !N1C->isOpaque())
10008 if (SDValue NewSHL = visitShiftByConstant(N))
10009 return NewSHL;
10010
10012 return SDValue(N, 0);
10013
10014 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10015 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10016 const APInt &C0 = N0.getConstantOperandAPInt(0);
10017 const APInt &C1 = N1C->getAPIntValue();
10018 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
10019 }
10020
10021 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10022 APInt ShlVal;
10023 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10024 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10025 const APInt &C0 = N0.getConstantOperandAPInt(0);
10026 if (ShlVal.ult(C0.getBitWidth())) {
10027 APInt NewStep = C0 << ShlVal;
10028 return DAG.getStepVector(SDLoc(N), VT, NewStep);
10029 }
10030 }
10031
10032 return SDValue();
10033}
10034
10035// Transform a right shift of a multiply into a multiply-high.
10036// Examples:
10037// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10038// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10040 const TargetLowering &TLI) {
10041 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10042 "SRL or SRA node is required here!");
10043
10044 // Check the shift amount. Proceed with the transformation if the shift
10045 // amount is constant.
10046 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10047 if (!ShiftAmtSrc)
10048 return SDValue();
10049
10050 SDLoc DL(N);
10051
10052 // The operation feeding into the shift must be a multiply.
10053 SDValue ShiftOperand = N->getOperand(0);
10054 if (ShiftOperand.getOpcode() != ISD::MUL)
10055 return SDValue();
10056
10057 // Both operands must be equivalent extend nodes.
10058 SDValue LeftOp = ShiftOperand.getOperand(0);
10059 SDValue RightOp = ShiftOperand.getOperand(1);
10060
10061 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10062 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10063
10064 if (!IsSignExt && !IsZeroExt)
10065 return SDValue();
10066
10067 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10068 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10069
10070 // return true if U may use the lower bits of its operands
10071 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10072 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10073 return true;
10074 }
10075 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10076 if (!UShiftAmtSrc) {
10077 return true;
10078 }
10079 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10080 return UShiftAmt < NarrowVTSize;
10081 };
10082
10083 // If the lower part of the MUL is also used and MUL_LOHI is supported
10084 // do not introduce the MULH in favor of MUL_LOHI
10085 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10086 if (!ShiftOperand.hasOneUse() &&
10087 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10088 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10089 return SDValue();
10090 }
10091
10092 SDValue MulhRightOp;
10094 unsigned ActiveBits = IsSignExt
10095 ? Constant->getAPIntValue().getSignificantBits()
10096 : Constant->getAPIntValue().getActiveBits();
10097 if (ActiveBits > NarrowVTSize)
10098 return SDValue();
10099 MulhRightOp = DAG.getConstant(
10100 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10101 NarrowVT);
10102 } else {
10103 if (LeftOp.getOpcode() != RightOp.getOpcode())
10104 return SDValue();
10105 // Check that the two extend nodes are the same type.
10106 if (NarrowVT != RightOp.getOperand(0).getValueType())
10107 return SDValue();
10108 MulhRightOp = RightOp.getOperand(0);
10109 }
10110
10111 EVT WideVT = LeftOp.getValueType();
10112 // Proceed with the transformation if the wide types match.
10113 assert((WideVT == RightOp.getValueType()) &&
10114 "Cannot have a multiply node with two different operand types.");
10115
10116 // Proceed with the transformation if the wide type is twice as large
10117 // as the narrow type.
10118 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10119 return SDValue();
10120
10121 // Check the shift amount with the narrow type size.
10122 // Proceed with the transformation if the shift amount is the width
10123 // of the narrow type.
10124 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10125 if (ShiftAmt != NarrowVTSize)
10126 return SDValue();
10127
10128 // If the operation feeding into the MUL is a sign extend (sext),
10129 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10130 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10131
10132 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10133 // or if it is a vector type then we could transform to an acceptable type and
10134 // rely on legalization to split/combine the result.
10135 if (NarrowVT.isVector()) {
10136 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10137 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10138 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10139 return SDValue();
10140 } else {
10141 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10142 return SDValue();
10143 }
10144
10145 SDValue Result =
10146 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10147 bool IsSigned = N->getOpcode() == ISD::SRA;
10148 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10149}
10150
10151// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10152// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10154 unsigned Opcode = N->getOpcode();
10155 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10156 return SDValue();
10157
10158 SDValue N0 = N->getOperand(0);
10159 EVT VT = N->getValueType(0);
10160 SDLoc DL(N);
10161 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10162 SDValue OldLHS = N0.getOperand(0);
10163 SDValue OldRHS = N0.getOperand(1);
10164
10165 // If both operands are bswap/bitreverse, ignore the multiuse
10166 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10167 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10168 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10169 OldRHS.getOperand(0));
10170 }
10171
10172 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10173 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10174 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10175 NewBitReorder);
10176 }
10177
10178 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10179 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10180 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10181 OldRHS.getOperand(0));
10182 }
10183 }
10184 return SDValue();
10185}
10186
10187SDValue DAGCombiner::visitSRA(SDNode *N) {
10188 SDValue N0 = N->getOperand(0);
10189 SDValue N1 = N->getOperand(1);
10190 if (SDValue V = DAG.simplifyShift(N0, N1))
10191 return V;
10192
10193 EVT VT = N0.getValueType();
10194 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10195
10196 // fold (sra c1, c2) -> (sra c1, c2)
10197 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
10198 return C;
10199
10200 // Arithmetic shifting an all-sign-bit value is a no-op.
10201 // fold (sra 0, x) -> 0
10202 // fold (sra -1, x) -> -1
10203 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10204 return N0;
10205
10206 // fold vector ops
10207 if (VT.isVector())
10208 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10209 return FoldedVOp;
10210
10211 if (SDValue NewSel = foldBinOpIntoSelect(N))
10212 return NewSel;
10213
10215
10216 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10217 // clamp (add c1, c2) to max shift.
10218 if (N0.getOpcode() == ISD::SRA) {
10219 SDLoc DL(N);
10220 EVT ShiftVT = N1.getValueType();
10221 EVT ShiftSVT = ShiftVT.getScalarType();
10222 SmallVector<SDValue, 16> ShiftValues;
10223
10224 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10225 APInt c1 = LHS->getAPIntValue();
10226 APInt c2 = RHS->getAPIntValue();
10227 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10228 APInt Sum = c1 + c2;
10229 unsigned ShiftSum =
10230 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10231 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10232 return true;
10233 };
10234 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10235 SDValue ShiftValue;
10236 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10237 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10238 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10239 assert(ShiftValues.size() == 1 &&
10240 "Expected matchBinaryPredicate to return one element for "
10241 "SPLAT_VECTORs");
10242 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10243 } else
10244 ShiftValue = ShiftValues[0];
10245 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10246 }
10247 }
10248
10249 // fold (sra (shl X, m), (sub result_size, n))
10250 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10251 // result_size - n != m.
10252 // If truncate is free for the target sext(shl) is likely to result in better
10253 // code.
10254 if (N0.getOpcode() == ISD::SHL && N1C) {
10255 // Get the two constants of the shifts, CN0 = m, CN = n.
10256 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10257 if (N01C) {
10258 LLVMContext &Ctx = *DAG.getContext();
10259 // Determine what the truncate's result bitsize and type would be.
10260 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10261
10262 if (VT.isVector())
10263 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10264
10265 // Determine the residual right-shift amount.
10266 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10267
10268 // If the shift is not a no-op (in which case this should be just a sign
10269 // extend already), the truncated to type is legal, sign_extend is legal
10270 // on that type, and the truncate to that type is both legal and free,
10271 // perform the transform.
10272 if ((ShiftAmt > 0) &&
10275 TLI.isTruncateFree(VT, TruncVT)) {
10276 SDLoc DL(N);
10277 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10279 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10280 N0.getOperand(0), Amt);
10281 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10282 Shift);
10283 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10284 N->getValueType(0), Trunc);
10285 }
10286 }
10287 }
10288
10289 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10290 // sra (add (shl X, N1C), AddC), N1C -->
10291 // sext (add (trunc X to (width - N1C)), AddC')
10292 // sra (sub AddC, (shl X, N1C)), N1C -->
10293 // sext (sub AddC1',(trunc X to (width - N1C)))
10294 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10295 N0.hasOneUse()) {
10296 bool IsAdd = N0.getOpcode() == ISD::ADD;
10297 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10298 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10299 Shl.hasOneUse()) {
10300 // TODO: AddC does not need to be a splat.
10301 if (ConstantSDNode *AddC =
10302 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10303 // Determine what the truncate's type would be and ask the target if
10304 // that is a free operation.
10305 LLVMContext &Ctx = *DAG.getContext();
10306 unsigned ShiftAmt = N1C->getZExtValue();
10307 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10308 if (VT.isVector())
10309 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10310
10311 // TODO: The simple type check probably belongs in the default hook
10312 // implementation and/or target-specific overrides (because
10313 // non-simple types likely require masking when legalized), but
10314 // that restriction may conflict with other transforms.
10315 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10316 TLI.isTruncateFree(VT, TruncVT)) {
10317 SDLoc DL(N);
10318 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10319 SDValue ShiftC =
10320 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10321 TruncVT.getScalarSizeInBits()),
10322 DL, TruncVT);
10323 SDValue Add;
10324 if (IsAdd)
10325 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10326 else
10327 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10328 return DAG.getSExtOrTrunc(Add, DL, VT);
10329 }
10330 }
10331 }
10332 }
10333
10334 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10335 if (N1.getOpcode() == ISD::TRUNCATE &&
10336 N1.getOperand(0).getOpcode() == ISD::AND) {
10337 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10338 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
10339 }
10340
10341 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10342 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10343 // if c1 is equal to the number of bits the trunc removes
10344 // TODO - support non-uniform vector shift amounts.
10345 if (N0.getOpcode() == ISD::TRUNCATE &&
10346 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10347 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10348 N0.getOperand(0).hasOneUse() &&
10349 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10350 SDValue N0Op0 = N0.getOperand(0);
10351 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10352 EVT LargeVT = N0Op0.getValueType();
10353 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10354 if (LargeShift->getAPIntValue() == TruncBits) {
10355 SDLoc DL(N);
10356 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10357 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10358 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10359 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10360 SDValue SRA =
10361 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10362 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10363 }
10364 }
10365 }
10366
10367 // Simplify, based on bits shifted out of the LHS.
10369 return SDValue(N, 0);
10370
10371 // If the sign bit is known to be zero, switch this to a SRL.
10372 if (DAG.SignBitIsZero(N0))
10373 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
10374
10375 if (N1C && !N1C->isOpaque())
10376 if (SDValue NewSRA = visitShiftByConstant(N))
10377 return NewSRA;
10378
10379 // Try to transform this shift into a multiply-high if
10380 // it matches the appropriate pattern detected in combineShiftToMULH.
10381 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10382 return MULH;
10383
10384 // Attempt to convert a sra of a load into a narrower sign-extending load.
10385 if (SDValue NarrowLoad = reduceLoadWidth(N))
10386 return NarrowLoad;
10387
10388 return SDValue();
10389}
10390
10391SDValue DAGCombiner::visitSRL(SDNode *N) {
10392 SDValue N0 = N->getOperand(0);
10393 SDValue N1 = N->getOperand(1);
10394 if (SDValue V = DAG.simplifyShift(N0, N1))
10395 return V;
10396
10397 EVT VT = N0.getValueType();
10398 EVT ShiftVT = N1.getValueType();
10399 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10400
10401 // fold (srl c1, c2) -> c1 >>u c2
10402 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
10403 return C;
10404
10405 // fold vector ops
10406 if (VT.isVector())
10407 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10408 return FoldedVOp;
10409
10410 if (SDValue NewSel = foldBinOpIntoSelect(N))
10411 return NewSel;
10412
10413 // if (srl x, c) is known to be zero, return 0
10415 if (N1C &&
10416 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10417 return DAG.getConstant(0, SDLoc(N), VT);
10418
10419 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10420 if (N0.getOpcode() == ISD::SRL) {
10421 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10423 APInt c1 = LHS->getAPIntValue();
10424 APInt c2 = RHS->getAPIntValue();
10425 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10426 return (c1 + c2).uge(OpSizeInBits);
10427 };
10428 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10429 return DAG.getConstant(0, SDLoc(N), VT);
10430
10431 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10433 APInt c1 = LHS->getAPIntValue();
10434 APInt c2 = RHS->getAPIntValue();
10435 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10436 return (c1 + c2).ult(OpSizeInBits);
10437 };
10438 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10439 SDLoc DL(N);
10440 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10441 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10442 }
10443 }
10444
10445 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10446 N0.getOperand(0).getOpcode() == ISD::SRL) {
10447 SDValue InnerShift = N0.getOperand(0);
10448 // TODO - support non-uniform vector shift amounts.
10449 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10450 uint64_t c1 = N001C->getZExtValue();
10451 uint64_t c2 = N1C->getZExtValue();
10452 EVT InnerShiftVT = InnerShift.getValueType();
10453 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10454 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10455 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10456 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10457 if (c1 + OpSizeInBits == InnerShiftSize) {
10458 SDLoc DL(N);
10459 if (c1 + c2 >= InnerShiftSize)
10460 return DAG.getConstant(0, DL, VT);
10461 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10462 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10463 InnerShift.getOperand(0), NewShiftAmt);
10464 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10465 }
10466 // In the more general case, we can clear the high bits after the shift:
10467 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10468 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10469 c1 + c2 < InnerShiftSize) {
10470 SDLoc DL(N);
10471 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10472 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10473 InnerShift.getOperand(0), NewShiftAmt);
10474 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10475 OpSizeInBits - c2),
10476 DL, InnerShiftVT);
10477 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10478 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10479 }
10480 }
10481 }
10482
10483 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10484 // (and (srl x, (sub c2, c1), MASK)
10485 if (N0.getOpcode() == ISD::SHL &&
10486 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10488 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10490 const APInt &LHSC = LHS->getAPIntValue();
10491 const APInt &RHSC = RHS->getAPIntValue();
10492 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10493 LHSC.getZExtValue() <= RHSC.getZExtValue();
10494 };
10495 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10496 /*AllowUndefs*/ false,
10497 /*AllowTypeMismatch*/ true)) {
10498 SDLoc DL(N);
10499 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10500 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10501 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10502 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10503 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10504 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10505 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10506 }
10507 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10508 /*AllowUndefs*/ false,
10509 /*AllowTypeMismatch*/ true)) {
10510 SDLoc DL(N);
10511 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10512 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10513 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10514 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10515 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10516 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10517 }
10518 }
10519
10520 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10521 // TODO - support non-uniform vector shift amounts.
10522 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10523 // Shifting in all undef bits?
10524 EVT SmallVT = N0.getOperand(0).getValueType();
10525 unsigned BitSize = SmallVT.getScalarSizeInBits();
10526 if (N1C->getAPIntValue().uge(BitSize))
10527 return DAG.getUNDEF(VT);
10528
10529 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10530 uint64_t ShiftAmt = N1C->getZExtValue();
10531 SDLoc DL0(N0);
10532 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10533 N0.getOperand(0),
10534 DAG.getConstant(ShiftAmt, DL0,
10535 getShiftAmountTy(SmallVT)));
10536 AddToWorklist(SmallShift.getNode());
10537 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10538 SDLoc DL(N);
10539 return DAG.getNode(ISD::AND, DL, VT,
10540 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10541 DAG.getConstant(Mask, DL, VT));
10542 }
10543 }
10544
10545 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10546 // bit, which is unmodified by sra.
10547 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10548 if (N0.getOpcode() == ISD::SRA)
10549 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
10550 }
10551
10552 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10553 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10554 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10555 isPowerOf2_32(OpSizeInBits) &&
10556 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10557 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10558
10559 // If any of the input bits are KnownOne, then the input couldn't be all
10560 // zeros, thus the result of the srl will always be zero.
10561 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10562
10563 // If all of the bits input the to ctlz node are known to be zero, then
10564 // the result of the ctlz is "32" and the result of the shift is one.
10565 APInt UnknownBits = ~Known.Zero;
10566 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10567
10568 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10569 if (UnknownBits.isPowerOf2()) {
10570 // Okay, we know that only that the single bit specified by UnknownBits
10571 // could be set on input to the CTLZ node. If this bit is set, the SRL
10572 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10573 // to an SRL/XOR pair, which is likely to simplify more.
10574 unsigned ShAmt = UnknownBits.countr_zero();
10575 SDValue Op = N0.getOperand(0);
10576
10577 if (ShAmt) {
10578 SDLoc DL(N0);
10579 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10580 DAG.getConstant(ShAmt, DL,
10581 getShiftAmountTy(Op.getValueType())));
10582 AddToWorklist(Op.getNode());
10583 }
10584
10585 SDLoc DL(N);
10586 return DAG.getNode(ISD::XOR, DL, VT,
10587 Op, DAG.getConstant(1, DL, VT));
10588 }
10589 }
10590
10591 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10592 if (N1.getOpcode() == ISD::TRUNCATE &&
10593 N1.getOperand(0).getOpcode() == ISD::AND) {
10594 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10595 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10596 }
10597
10598 // fold operands of srl based on knowledge that the low bits are not
10599 // demanded.
10601 return SDValue(N, 0);
10602
10603 if (N1C && !N1C->isOpaque())
10604 if (SDValue NewSRL = visitShiftByConstant(N))
10605 return NewSRL;
10606
10607 // Attempt to convert a srl of a load into a narrower zero-extending load.
10608 if (SDValue NarrowLoad = reduceLoadWidth(N))
10609 return NarrowLoad;
10610
10611 // Here is a common situation. We want to optimize:
10612 //
10613 // %a = ...
10614 // %b = and i32 %a, 2
10615 // %c = srl i32 %b, 1
10616 // brcond i32 %c ...
10617 //
10618 // into
10619 //
10620 // %a = ...
10621 // %b = and %a, 2
10622 // %c = setcc eq %b, 0
10623 // brcond %c ...
10624 //
10625 // However when after the source operand of SRL is optimized into AND, the SRL
10626 // itself may not be optimized further. Look for it and add the BRCOND into
10627 // the worklist.
10628 //
10629 // The also tends to happen for binary operations when SimplifyDemandedBits
10630 // is involved.
10631 //
10632 // FIXME: This is unecessary if we process the DAG in topological order,
10633 // which we plan to do. This workaround can be removed once the DAG is
10634 // processed in topological order.
10635 if (N->hasOneUse()) {
10636 SDNode *Use = *N->use_begin();
10637
10638 // Look pass the truncate.
10639 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10640 Use = *Use->use_begin();
10641
10642 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10643 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10644 AddToWorklist(Use);
10645 }
10646
10647 // Try to transform this shift into a multiply-high if
10648 // it matches the appropriate pattern detected in combineShiftToMULH.
10649 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10650 return MULH;
10651
10652 return SDValue();
10653}
10654
10655SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10656 EVT VT = N->getValueType(0);
10657 SDValue N0 = N->getOperand(0);
10658 SDValue N1 = N->getOperand(1);
10659 SDValue N2 = N->getOperand(2);
10660 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10661 unsigned BitWidth = VT.getScalarSizeInBits();
10662
10663 // fold (fshl N0, N1, 0) -> N0
10664 // fold (fshr N0, N1, 0) -> N1
10666 if (DAG.MaskedValueIsZero(
10667 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10668 return IsFSHL ? N0 : N1;
10669
10670 auto IsUndefOrZero = [](SDValue V) {
10671 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10672 };
10673
10674 // TODO - support non-uniform vector shift amounts.
10675 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10676 EVT ShAmtTy = N2.getValueType();
10677
10678 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10679 if (Cst->getAPIntValue().uge(BitWidth)) {
10680 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10681 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10682 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10683 }
10684
10685 unsigned ShAmt = Cst->getZExtValue();
10686 if (ShAmt == 0)
10687 return IsFSHL ? N0 : N1;
10688
10689 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10690 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10691 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10692 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10693 if (IsUndefOrZero(N0))
10694 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10695 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10696 SDLoc(N), ShAmtTy));
10697 if (IsUndefOrZero(N1))
10698 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10699 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10700 SDLoc(N), ShAmtTy));
10701
10702 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10703 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10704 // TODO - bigendian support once we have test coverage.
10705 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10706 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10707 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10708 !DAG.getDataLayout().isBigEndian()) {
10709 auto *LHS = dyn_cast<LoadSDNode>(N0);
10710 auto *RHS = dyn_cast<LoadSDNode>(N1);
10711 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10712 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10713 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10714 ISD::isNON_EXTLoad(LHS)) {
10715 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10716 SDLoc DL(RHS);
10717 uint64_t PtrOff =
10718 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10719 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10720 unsigned Fast = 0;
10721 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10722 RHS->getAddressSpace(), NewAlign,
10723 RHS->getMemOperand()->getFlags(), &Fast) &&
10724 Fast) {
10725 SDValue NewPtr = DAG.getMemBasePlusOffset(
10726 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10727 AddToWorklist(NewPtr.getNode());
10728 SDValue Load = DAG.getLoad(
10729 VT, DL, RHS->getChain(), NewPtr,
10730 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10731 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10732 // Replace the old load's chain with the new load's chain.
10733 WorklistRemover DeadNodes(*this);
10734 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10735 return Load;
10736 }
10737 }
10738 }
10739 }
10740 }
10741
10742 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10743 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10744 // iff We know the shift amount is in range.
10745 // TODO: when is it worth doing SUB(BW, N2) as well?
10746 if (isPowerOf2_32(BitWidth)) {
10747 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10748 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10749 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10750 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10751 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10752 }
10753
10754 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10755 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10756 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10757 // is legal as well we might be better off avoiding non-constant (BW - N2).
10758 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10759 if (N0 == N1 && hasOperation(RotOpc, VT))
10760 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10761
10762 // Simplify, based on bits shifted out of N0/N1.
10764 return SDValue(N, 0);
10765
10766 return SDValue();
10767}
10768
10769SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10770 SDValue N0 = N->getOperand(0);
10771 SDValue N1 = N->getOperand(1);
10772 if (SDValue V = DAG.simplifyShift(N0, N1))
10773 return V;
10774
10775 EVT VT = N0.getValueType();
10776
10777 // fold (*shlsat c1, c2) -> c1<<c2
10778 if (SDValue C =
10779 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10780 return C;
10781
10783
10784 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10785 // fold (sshlsat x, c) -> (shl x, c)
10786 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10787 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10788 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10789
10790 // fold (ushlsat x, c) -> (shl x, c)
10791 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10792 N1C->getAPIntValue().ule(
10794 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10795 }
10796
10797 return SDValue();
10798}
10799
10800// Given a ABS node, detect the following patterns:
10801// (ABS (SUB (EXTEND a), (EXTEND b))).
10802// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10803// Generates UABD/SABD instruction.
10804SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10805 EVT SrcVT = N->getValueType(0);
10806
10807 if (N->getOpcode() == ISD::TRUNCATE)
10808 N = N->getOperand(0).getNode();
10809
10810 if (N->getOpcode() != ISD::ABS)
10811 return SDValue();
10812
10813 EVT VT = N->getValueType(0);
10814 SDValue AbsOp1 = N->getOperand(0);
10815 SDValue Op0, Op1;
10816
10817 if (AbsOp1.getOpcode() != ISD::SUB)
10818 return SDValue();
10819
10820 Op0 = AbsOp1.getOperand(0);
10821 Op1 = AbsOp1.getOperand(1);
10822
10823 unsigned Opc0 = Op0.getOpcode();
10824
10825 // Check if the operands of the sub are (zero|sign)-extended.
10826 // TODO: Should we use ValueTracking instead?
10827 if (Opc0 != Op1.getOpcode() ||
10828 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10829 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10830 // fold (abs (sub nsw x, y)) -> abds(x, y)
10831 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10832 TLI.preferABDSToABSWithNSW(VT)) {
10833 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10834 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10835 }
10836 return SDValue();
10837 }
10838
10839 EVT VT0, VT1;
10840 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10841 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10842 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10843 } else {
10844 VT0 = Op0.getOperand(0).getValueType();
10845 VT1 = Op1.getOperand(0).getValueType();
10846 }
10847 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10848
10849 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10850 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10851 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10852 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10853 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10854 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10855 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10856 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10857 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10858 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10859 }
10860
10861 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10862 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10863 if (hasOperation(ABDOpcode, VT)) {
10864 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10865 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10866 }
10867
10868 return SDValue();
10869}
10870
10871SDValue DAGCombiner::visitABS(SDNode *N) {
10872 SDValue N0 = N->getOperand(0);
10873 EVT VT = N->getValueType(0);
10874 SDLoc DL(N);
10875
10876 // fold (abs c1) -> c2
10877 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10878 return C;
10879 // fold (abs (abs x)) -> (abs x)
10880 if (N0.getOpcode() == ISD::ABS)
10881 return N0;
10882 // fold (abs x) -> x iff not-negative
10883 if (DAG.SignBitIsZero(N0))
10884 return N0;
10885
10886 if (SDValue ABD = foldABSToABD(N, DL))
10887 return ABD;
10888
10889 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10890 // iff zero_extend/truncate are free.
10891 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10892 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10893 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10894 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10895 hasOperation(ISD::ABS, ExtVT)) {
10896 return DAG.getNode(
10897 ISD::ZERO_EXTEND, DL, VT,
10898 DAG.getNode(ISD::ABS, DL, ExtVT,
10899 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10900 }
10901 }
10902
10903 return SDValue();
10904}
10905
10906SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10907 SDValue N0 = N->getOperand(0);
10908 EVT VT = N->getValueType(0);
10909 SDLoc DL(N);
10910
10911 // fold (bswap c1) -> c2
10912 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
10913 return C;
10914 // fold (bswap (bswap x)) -> x
10915 if (N0.getOpcode() == ISD::BSWAP)
10916 return N0.getOperand(0);
10917
10918 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10919 // isn't supported, it will be expanded to bswap followed by a manual reversal
10920 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10921 // the two bswaps if the bitreverse gets expanded.
10922 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10923 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10924 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10925 }
10926
10927 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10928 // iff x >= bw/2 (i.e. lower half is known zero)
10929 unsigned BW = VT.getScalarSizeInBits();
10930 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10931 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10932 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10933 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10934 ShAmt->getZExtValue() >= (BW / 2) &&
10935 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10936 TLI.isTruncateFree(VT, HalfVT) &&
10937 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10938 SDValue Res = N0.getOperand(0);
10939 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10940 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10941 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10942 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10943 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10944 return DAG.getZExtOrTrunc(Res, DL, VT);
10945 }
10946 }
10947
10948 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10949 // inverse-shift-of-bswap:
10950 // bswap (X u<< C) --> (bswap X) u>> C
10951 // bswap (X u>> C) --> (bswap X) u<< C
10952 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10953 N0.hasOneUse()) {
10954 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10955 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10956 ShAmt->getZExtValue() % 8 == 0) {
10957 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10958 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10959 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10960 }
10961 }
10962
10963 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
10964 return V;
10965
10966 return SDValue();
10967}
10968
10969SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10970 SDValue N0 = N->getOperand(0);
10971 EVT VT = N->getValueType(0);
10972 SDLoc DL(N);
10973
10974 // fold (bitreverse c1) -> c2
10975 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
10976 return C;
10977 // fold (bitreverse (bitreverse x)) -> x
10978 if (N0.getOpcode() == ISD::BITREVERSE)
10979 return N0.getOperand(0);
10980 return SDValue();
10981}
10982
10983SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10984 SDValue N0 = N->getOperand(0);
10985 EVT VT = N->getValueType(0);
10986 SDLoc DL(N);
10987
10988 // fold (ctlz c1) -> c2
10989 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
10990 return C;
10991
10992 // If the value is known never to be zero, switch to the undef version.
10993 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
10994 if (DAG.isKnownNeverZero(N0))
10995 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
10996
10997 return SDValue();
10998}
10999
11000SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11001 SDValue N0 = N->getOperand(0);
11002 EVT VT = N->getValueType(0);
11003 SDLoc DL(N);
11004
11005 // fold (ctlz_zero_undef c1) -> c2
11006 if (SDValue C =
11008 return C;
11009 return SDValue();
11010}
11011
11012SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11013 SDValue N0 = N->getOperand(0);
11014 EVT VT = N->getValueType(0);
11015 SDLoc DL(N);
11016
11017 // fold (cttz c1) -> c2
11018 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11019 return C;
11020
11021 // If the value is known never to be zero, switch to the undef version.
11022 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11023 if (DAG.isKnownNeverZero(N0))
11024 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11025
11026 return SDValue();
11027}
11028
11029SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11030 SDValue N0 = N->getOperand(0);
11031 EVT VT = N->getValueType(0);
11032 SDLoc DL(N);
11033
11034 // fold (cttz_zero_undef c1) -> c2
11035 if (SDValue C =
11037 return C;
11038 return SDValue();
11039}
11040
11041SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11042 SDValue N0 = N->getOperand(0);
11043 EVT VT = N->getValueType(0);
11044 unsigned NumBits = VT.getScalarSizeInBits();
11045 SDLoc DL(N);
11046
11047 // fold (ctpop c1) -> c2
11048 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11049 return C;
11050
11051 // If the source is being shifted, but doesn't affect any active bits,
11052 // then we can call CTPOP on the shift source directly.
11053 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11054 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11055 const APInt &Amt = AmtC->getAPIntValue();
11056 if (Amt.ult(NumBits)) {
11057 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11058 if ((N0.getOpcode() == ISD::SRL &&
11059 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11060 (N0.getOpcode() == ISD::SHL &&
11061 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11062 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11063 }
11064 }
11065 }
11066 }
11067
11068 // If the upper bits are known to be zero, then see if its profitable to
11069 // only count the lower bits.
11070 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11071 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11072 if (hasOperation(ISD::CTPOP, HalfVT) &&
11073 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11074 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11075 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11076 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11077 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11078 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11079 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11080 }
11081 }
11082 }
11083
11084 return SDValue();
11085}
11086
11087// FIXME: This should be checking for no signed zeros on individual operands, as
11088// well as no nans.
11090 SDValue RHS,
11091 const TargetLowering &TLI) {
11092 const TargetOptions &Options = DAG.getTarget().Options;
11093 EVT VT = LHS.getValueType();
11094
11095 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11098}
11099
11101 SDValue RHS, SDValue True, SDValue False,
11103 const TargetLowering &TLI,
11104 SelectionDAG &DAG) {
11105 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11106 switch (CC) {
11107 case ISD::SETOLT:
11108 case ISD::SETOLE:
11109 case ISD::SETLT:
11110 case ISD::SETLE:
11111 case ISD::SETULT:
11112 case ISD::SETULE: {
11113 // Since it's known never nan to get here already, either fminnum or
11114 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11115 // expanded in terms of it.
11116 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11117 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11118 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11119
11120 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11121 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11122 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11123 return SDValue();
11124 }
11125 case ISD::SETOGT:
11126 case ISD::SETOGE:
11127 case ISD::SETGT:
11128 case ISD::SETGE:
11129 case ISD::SETUGT:
11130 case ISD::SETUGE: {
11131 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11132 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11133 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11134
11135 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11136 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11137 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11138 return SDValue();
11139 }
11140 default:
11141 return SDValue();
11142 }
11143}
11144
11145/// Generate Min/Max node
11146SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11147 SDValue RHS, SDValue True,
11148 SDValue False, ISD::CondCode CC) {
11149 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11150 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11151
11152 // If we can't directly match this, try to see if we can pull an fneg out of
11153 // the select.
11155 True, DAG, LegalOperations, ForCodeSize);
11156 if (!NegTrue)
11157 return SDValue();
11158
11159 HandleSDNode NegTrueHandle(NegTrue);
11160
11161 // Try to unfold an fneg from the select if we are comparing the negated
11162 // constant.
11163 //
11164 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11165 //
11166 // TODO: Handle fabs
11167 if (LHS == NegTrue) {
11168 // If we can't directly match this, try to see if we can pull an fneg out of
11169 // the select.
11171 RHS, DAG, LegalOperations, ForCodeSize);
11172 if (NegRHS) {
11173 HandleSDNode NegRHSHandle(NegRHS);
11174 if (NegRHS == False) {
11175 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11176 False, CC, TLI, DAG);
11177 if (Combined)
11178 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11179 }
11180 }
11181 }
11182
11183 return SDValue();
11184}
11185
11186/// If a (v)select has a condition value that is a sign-bit test, try to smear
11187/// the condition operand sign-bit across the value width and use it as a mask.
11189 SDValue Cond = N->getOperand(0);
11190 SDValue C1 = N->getOperand(1);
11191 SDValue C2 = N->getOperand(2);
11193 return SDValue();
11194
11195 EVT VT = N->getValueType(0);
11196 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11197 VT != Cond.getOperand(0).getValueType())
11198 return SDValue();
11199
11200 // The inverted-condition + commuted-select variants of these patterns are
11201 // canonicalized to these forms in IR.
11202 SDValue X = Cond.getOperand(0);
11203 SDValue CondC = Cond.getOperand(1);
11204 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11205 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11207 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11208 SDLoc DL(N);
11209 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11210 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11211 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11212 }
11213 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11214 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11215 SDLoc DL(N);
11216 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11217 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11218 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11219 }
11220 return SDValue();
11221}
11222
11224 const TargetLowering &TLI) {
11225 if (!TLI.convertSelectOfConstantsToMath(VT))
11226 return false;
11227
11228 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11229 return true;
11231 return true;
11232
11233 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11234 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11235 return true;
11236 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11237 return true;
11238
11239 return false;
11240}
11241
11242SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11243 SDValue Cond = N->getOperand(0);
11244 SDValue N1 = N->getOperand(1);
11245 SDValue N2 = N->getOperand(2);
11246 EVT VT = N->getValueType(0);
11247 EVT CondVT = Cond.getValueType();
11248 SDLoc DL(N);
11249
11250 if (!VT.isInteger())
11251 return SDValue();
11252
11253 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11254 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11255 if (!C1 || !C2)
11256 return SDValue();
11257
11258 if (CondVT != MVT::i1 || LegalOperations) {
11259 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11260 // We can't do this reliably if integer based booleans have different contents
11261 // to floating point based booleans. This is because we can't tell whether we
11262 // have an integer-based boolean or a floating-point-based boolean unless we
11263 // can find the SETCC that produced it and inspect its operands. This is
11264 // fairly easy if C is the SETCC node, but it can potentially be
11265 // undiscoverable (or not reasonably discoverable). For example, it could be
11266 // in another basic block or it could require searching a complicated
11267 // expression.
11268 if (CondVT.isInteger() &&
11269 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11271 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11273 C1->isZero() && C2->isOne()) {
11274 SDValue NotCond =
11275 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11276 if (VT.bitsEq(CondVT))
11277 return NotCond;
11278 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11279 }
11280
11281 return SDValue();
11282 }
11283
11284 // Only do this before legalization to avoid conflicting with target-specific
11285 // transforms in the other direction (create a select from a zext/sext). There
11286 // is also a target-independent combine here in DAGCombiner in the other
11287 // direction for (select Cond, -1, 0) when the condition is not i1.
11288 assert(CondVT == MVT::i1 && !LegalOperations);
11289
11290 // select Cond, 1, 0 --> zext (Cond)
11291 if (C1->isOne() && C2->isZero())
11292 return DAG.getZExtOrTrunc(Cond, DL, VT);
11293
11294 // select Cond, -1, 0 --> sext (Cond)
11295 if (C1->isAllOnes() && C2->isZero())
11296 return DAG.getSExtOrTrunc(Cond, DL, VT);
11297
11298 // select Cond, 0, 1 --> zext (!Cond)
11299 if (C1->isZero() && C2->isOne()) {
11300 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11301 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11302 return NotCond;
11303 }
11304
11305 // select Cond, 0, -1 --> sext (!Cond)
11306 if (C1->isZero() && C2->isAllOnes()) {
11307 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11308 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11309 return NotCond;
11310 }
11311
11312 // Use a target hook because some targets may prefer to transform in the
11313 // other direction.
11315 return SDValue();
11316
11317 // For any constants that differ by 1, we can transform the select into
11318 // an extend and add.
11319 const APInt &C1Val = C1->getAPIntValue();
11320 const APInt &C2Val = C2->getAPIntValue();
11321
11322 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11323 if (C1Val - 1 == C2Val) {
11324 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11325 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11326 }
11327
11328 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11329 if (C1Val + 1 == C2Val) {
11330 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11331 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11332 }
11333
11334 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11335 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11336 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11337 SDValue ShAmtC =
11338 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11339 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11340 }
11341
11342 // select Cond, -1, C --> or (sext Cond), C
11343 if (C1->isAllOnes()) {
11344 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11345 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11346 }
11347
11348 // select Cond, C, -1 --> or (sext (not Cond)), C
11349 if (C2->isAllOnes()) {
11350 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11351 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11352 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11353 }
11354
11356 return V;
11357
11358 return SDValue();
11359}
11360
11361template <class MatchContextClass>
11363 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11364 N->getOpcode() == ISD::VP_SELECT) &&
11365 "Expected a (v)(vp.)select");
11366 SDValue Cond = N->getOperand(0);
11367 SDValue T = N->getOperand(1), F = N->getOperand(2);
11368 EVT VT = N->getValueType(0);
11369 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11370 MatchContextClass matcher(DAG, TLI, N);
11371
11372 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11373 return SDValue();
11374
11375 // select Cond, Cond, F --> or Cond, F
11376 // select Cond, 1, F --> or Cond, F
11377 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11378 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11379
11380 // select Cond, T, Cond --> and Cond, T
11381 // select Cond, T, 0 --> and Cond, T
11382 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11383 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11384
11385 // select Cond, T, 1 --> or (not Cond), T
11386 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11387 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11388 DAG.getAllOnesConstant(SDLoc(N), VT));
11389 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11390 }
11391
11392 // select Cond, 0, F --> and (not Cond), F
11393 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11394 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11395 DAG.getAllOnesConstant(SDLoc(N), VT));
11396 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11397 }
11398
11399 return SDValue();
11400}
11401
11403 SDValue N0 = N->getOperand(0);
11404 SDValue N1 = N->getOperand(1);
11405 SDValue N2 = N->getOperand(2);
11406 EVT VT = N->getValueType(0);
11407 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11408 return SDValue();
11409
11410 SDValue Cond0 = N0.getOperand(0);
11411 SDValue Cond1 = N0.getOperand(1);
11412 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11413 if (VT != Cond0.getValueType())
11414 return SDValue();
11415
11416 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11417 // compare is inverted from that pattern ("Cond0 s> -1").
11418 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11419 ; // This is the pattern we are looking for.
11420 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11421 std::swap(N1, N2);
11422 else
11423 return SDValue();
11424
11425 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11426 if (isNullOrNullSplat(N2)) {
11427 SDLoc DL(N);
11428 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11429 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11430 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11431 }
11432
11433 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11434 if (isAllOnesOrAllOnesSplat(N1)) {
11435 SDLoc DL(N);
11436 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11437 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11438 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11439 }
11440
11441 // If we have to invert the sign bit mask, only do that transform if the
11442 // target has a bitwise 'and not' instruction (the invert is free).
11443 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11444 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11445 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11446 SDLoc DL(N);
11447 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11448 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11449 SDValue Not = DAG.getNOT(DL, Sra, VT);
11450 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11451 }
11452
11453 // TODO: There's another pattern in this family, but it may require
11454 // implementing hasOrNot() to check for profitability:
11455 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11456
11457 return SDValue();
11458}
11459
11460SDValue DAGCombiner::visitSELECT(SDNode *N) {
11461 SDValue N0 = N->getOperand(0);
11462 SDValue N1 = N->getOperand(1);
11463 SDValue N2 = N->getOperand(2);
11464 EVT VT = N->getValueType(0);
11465 EVT VT0 = N0.getValueType();
11466 SDLoc DL(N);
11467 SDNodeFlags Flags = N->getFlags();
11468
11469 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11470 return V;
11471
11472 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11473 return V;
11474
11475 // select (not Cond), N1, N2 -> select Cond, N2, N1
11476 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11477 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11478 SelectOp->setFlags(Flags);
11479 return SelectOp;
11480 }
11481
11482 if (SDValue V = foldSelectOfConstants(N))
11483 return V;
11484
11485 // If we can fold this based on the true/false value, do so.
11486 if (SimplifySelectOps(N, N1, N2))
11487 return SDValue(N, 0); // Don't revisit N.
11488
11489 if (VT0 == MVT::i1) {
11490 // The code in this block deals with the following 2 equivalences:
11491 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11492 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11493 // The target can specify its preferred form with the
11494 // shouldNormalizeToSelectSequence() callback. However we always transform
11495 // to the right anyway if we find the inner select exists in the DAG anyway
11496 // and we always transform to the left side if we know that we can further
11497 // optimize the combination of the conditions.
11498 bool normalizeToSequence =
11500 // select (and Cond0, Cond1), X, Y
11501 // -> select Cond0, (select Cond1, X, Y), Y
11502 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11503 SDValue Cond0 = N0->getOperand(0);
11504 SDValue Cond1 = N0->getOperand(1);
11505 SDValue InnerSelect =
11506 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11507 if (normalizeToSequence || !InnerSelect.use_empty())
11508 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11509 InnerSelect, N2, Flags);
11510 // Cleanup on failure.
11511 if (InnerSelect.use_empty())
11512 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11513 }
11514 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11515 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11516 SDValue Cond0 = N0->getOperand(0);
11517 SDValue Cond1 = N0->getOperand(1);
11518 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11519 Cond1, N1, N2, Flags);
11520 if (normalizeToSequence || !InnerSelect.use_empty())
11521 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11522 InnerSelect, Flags);
11523 // Cleanup on failure.
11524 if (InnerSelect.use_empty())
11525 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11526 }
11527
11528 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11529 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11530 SDValue N1_0 = N1->getOperand(0);
11531 SDValue N1_1 = N1->getOperand(1);
11532 SDValue N1_2 = N1->getOperand(2);
11533 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11534 // Create the actual and node if we can generate good code for it.
11535 if (!normalizeToSequence) {
11536 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11537 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11538 N2, Flags);
11539 }
11540 // Otherwise see if we can optimize the "and" to a better pattern.
11541 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11542 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11543 N2, Flags);
11544 }
11545 }
11546 }
11547 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11548 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11549 SDValue N2_0 = N2->getOperand(0);
11550 SDValue N2_1 = N2->getOperand(1);
11551 SDValue N2_2 = N2->getOperand(2);
11552 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11553 // Create the actual or node if we can generate good code for it.
11554 if (!normalizeToSequence) {
11555 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11556 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11557 N2_2, Flags);
11558 }
11559 // Otherwise see if we can optimize to a better pattern.
11560 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11561 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11562 N2_2, Flags);
11563 }
11564 }
11565 }
11566
11567 // Fold selects based on a setcc into other things, such as min/max/abs.
11568 if (N0.getOpcode() == ISD::SETCC) {
11569 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11570 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11571
11572 // select (fcmp lt x, y), x, y -> fminnum x, y
11573 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11574 //
11575 // This is OK if we don't care what happens if either operand is a NaN.
11576 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11577 if (SDValue FMinMax =
11578 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11579 return FMinMax;
11580
11581 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11582 // This is conservatively limited to pre-legal-operations to give targets
11583 // a chance to reverse the transform if they want to do that. Also, it is
11584 // unlikely that the pattern would be formed late, so it's probably not
11585 // worth going through the other checks.
11586 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11587 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11588 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11589 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11590 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11591 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11592 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11593 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11594 //
11595 // The IR equivalent of this transform would have this form:
11596 // %a = add %x, C
11597 // %c = icmp ugt %x, ~C
11598 // %r = select %c, -1, %a
11599 // =>
11600 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11601 // %u0 = extractvalue %u, 0
11602 // %u1 = extractvalue %u, 1
11603 // %r = select %u1, -1, %u0
11604 SDVTList VTs = DAG.getVTList(VT, VT0);
11605 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11606 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11607 }
11608 }
11609
11610 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11611 (!LegalOperations &&
11613 // Any flags available in a select/setcc fold will be on the setcc as they
11614 // migrated from fcmp
11615 Flags = N0->getFlags();
11616 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11617 N2, N0.getOperand(2));
11618 SelectNode->setFlags(Flags);
11619 return SelectNode;
11620 }
11621
11622 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11623 return NewSel;
11624 }
11625
11626 if (!VT.isVector())
11627 if (SDValue BinOp = foldSelectOfBinops(N))
11628 return BinOp;
11629
11630 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11631 return R;
11632
11633 return SDValue();
11634}
11635
11636// This function assumes all the vselect's arguments are CONCAT_VECTOR
11637// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11639 SDLoc DL(N);
11640 SDValue Cond = N->getOperand(0);
11641 SDValue LHS = N->getOperand(1);
11642 SDValue RHS = N->getOperand(2);
11643 EVT VT = N->getValueType(0);
11644 int NumElems = VT.getVectorNumElements();
11645 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11646 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11647 Cond.getOpcode() == ISD::BUILD_VECTOR);
11648
11649 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11650 // binary ones here.
11651 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11652 return SDValue();
11653
11654 // We're sure we have an even number of elements due to the
11655 // concat_vectors we have as arguments to vselect.
11656 // Skip BV elements until we find one that's not an UNDEF
11657 // After we find an UNDEF element, keep looping until we get to half the
11658 // length of the BV and see if all the non-undef nodes are the same.
11659 ConstantSDNode *BottomHalf = nullptr;
11660 for (int i = 0; i < NumElems / 2; ++i) {
11661 if (Cond->getOperand(i)->isUndef())
11662 continue;
11663
11664 if (BottomHalf == nullptr)
11665 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11666 else if (Cond->getOperand(i).getNode() != BottomHalf)
11667 return SDValue();
11668 }
11669
11670 // Do the same for the second half of the BuildVector
11671 ConstantSDNode *TopHalf = nullptr;
11672 for (int i = NumElems / 2; i < NumElems; ++i) {
11673 if (Cond->getOperand(i)->isUndef())
11674 continue;
11675
11676 if (TopHalf == nullptr)
11677 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11678 else if (Cond->getOperand(i).getNode() != TopHalf)
11679 return SDValue();
11680 }
11681
11682 assert(TopHalf && BottomHalf &&
11683 "One half of the selector was all UNDEFs and the other was all the "
11684 "same value. This should have been addressed before this function.");
11685 return DAG.getNode(
11687 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11688 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11689}
11690
11691bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11692 SelectionDAG &DAG, const SDLoc &DL) {
11693
11694 // Only perform the transformation when existing operands can be reused.
11695 if (IndexIsScaled)
11696 return false;
11697
11698 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11699 return false;
11700
11701 EVT VT = BasePtr.getValueType();
11702
11703 if (SDValue SplatVal = DAG.getSplatValue(Index);
11704 SplatVal && !isNullConstant(SplatVal) &&
11705 SplatVal.getValueType() == VT) {
11706 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11707 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11708 return true;
11709 }
11710
11711 if (Index.getOpcode() != ISD::ADD)
11712 return false;
11713
11714 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11715 SplatVal && SplatVal.getValueType() == VT) {
11716 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11717 Index = Index.getOperand(1);
11718 return true;
11719 }
11720 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11721 SplatVal && SplatVal.getValueType() == VT) {
11722 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11723 Index = Index.getOperand(0);
11724 return true;
11725 }
11726 return false;
11727}
11728
11729// Fold sext/zext of index into index type.
11731 SelectionDAG &DAG) {
11732 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11733
11734 // It's always safe to look through zero extends.
11735 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11736 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11737 IndexType = ISD::UNSIGNED_SCALED;
11738 Index = Index.getOperand(0);
11739 return true;
11740 }
11741 if (ISD::isIndexTypeSigned(IndexType)) {
11742 IndexType = ISD::UNSIGNED_SCALED;
11743 return true;
11744 }
11745 }
11746
11747 // It's only safe to look through sign extends when Index is signed.
11748 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11749 ISD::isIndexTypeSigned(IndexType) &&
11750 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11751 Index = Index.getOperand(0);
11752 return true;
11753 }
11754
11755 return false;
11756}
11757
11758SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11759 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11760 SDValue Mask = MSC->getMask();
11761 SDValue Chain = MSC->getChain();
11762 SDValue Index = MSC->getIndex();
11763 SDValue Scale = MSC->getScale();
11764 SDValue StoreVal = MSC->getValue();
11765 SDValue BasePtr = MSC->getBasePtr();
11766 SDValue VL = MSC->getVectorLength();
11767 ISD::MemIndexType IndexType = MSC->getIndexType();
11768 SDLoc DL(N);
11769
11770 // Zap scatters with a zero mask.
11772 return Chain;
11773
11774 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11775 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11776 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11777 DL, Ops, MSC->getMemOperand(), IndexType);
11778 }
11779
11780 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11781 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11782 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11783 DL, Ops, MSC->getMemOperand(), IndexType);
11784 }
11785
11786 return SDValue();
11787}
11788
11789SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11790 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11791 SDValue Mask = MSC->getMask();
11792 SDValue Chain = MSC->getChain();
11793 SDValue Index = MSC->getIndex();
11794 SDValue Scale = MSC->getScale();
11795 SDValue StoreVal = MSC->getValue();
11796 SDValue BasePtr = MSC->getBasePtr();
11797 ISD::MemIndexType IndexType = MSC->getIndexType();
11798 SDLoc DL(N);
11799
11800 // Zap scatters with a zero mask.
11802 return Chain;
11803
11804 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11805 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11806 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11807 DL, Ops, MSC->getMemOperand(), IndexType,
11808 MSC->isTruncatingStore());
11809 }
11810
11811 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11812 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11813 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11814 DL, Ops, MSC->getMemOperand(), IndexType,
11815 MSC->isTruncatingStore());
11816 }
11817
11818 return SDValue();
11819}
11820
11821SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11822 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11823 SDValue Mask = MST->getMask();
11824 SDValue Chain = MST->getChain();
11825 SDValue Value = MST->getValue();
11826 SDValue Ptr = MST->getBasePtr();
11827 SDLoc DL(N);
11828
11829 // Zap masked stores with a zero mask.
11831 return Chain;
11832
11833 // Remove a masked store if base pointers and masks are equal.
11834 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11835 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11836 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11837 !MST->getBasePtr().isUndef() &&
11838 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11839 MST1->getMemoryVT().getStoreSize()) ||
11841 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11842 MST->getMemoryVT().getStoreSize())) {
11843 CombineTo(MST1, MST1->getChain());
11844 if (N->getOpcode() != ISD::DELETED_NODE)
11845 AddToWorklist(N);
11846 return SDValue(N, 0);
11847 }
11848 }
11849
11850 // If this is a masked load with an all ones mask, we can use a unmasked load.
11851 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11852 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11853 !MST->isCompressingStore() && !MST->isTruncatingStore())
11854 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11855 MST->getBasePtr(), MST->getPointerInfo(),
11856 MST->getOriginalAlign(),
11857 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11858
11859 // Try transforming N to an indexed store.
11860 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11861 return SDValue(N, 0);
11862
11863 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11864 Value.getValueType().isInteger() &&
11865 (!isa<ConstantSDNode>(Value) ||
11866 !cast<ConstantSDNode>(Value)->isOpaque())) {
11867 APInt TruncDemandedBits =
11868 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11870
11871 // See if we can simplify the operation with
11872 // SimplifyDemandedBits, which only works if the value has a single use.
11873 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11874 // Re-visit the store if anything changed and the store hasn't been merged
11875 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11876 // node back to the worklist if necessary, but we also need to re-visit
11877 // the Store node itself.
11878 if (N->getOpcode() != ISD::DELETED_NODE)
11879 AddToWorklist(N);
11880 return SDValue(N, 0);
11881 }
11882 }
11883
11884 // If this is a TRUNC followed by a masked store, fold this into a masked
11885 // truncating store. We can do this even if this is already a masked
11886 // truncstore.
11887 // TODO: Try combine to masked compress store if possiable.
11888 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11889 MST->isUnindexed() && !MST->isCompressingStore() &&
11890 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11891 MST->getMemoryVT(), LegalOperations)) {
11892 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11893 Value.getOperand(0).getValueType());
11894 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11895 MST->getOffset(), Mask, MST->getMemoryVT(),
11896 MST->getMemOperand(), MST->getAddressingMode(),
11897 /*IsTruncating=*/true);
11898 }
11899
11900 return SDValue();
11901}
11902
11903SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11904 auto *SST = cast<VPStridedStoreSDNode>(N);
11905 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11906 // Combine strided stores with unit-stride to a regular VP store.
11907 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11908 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11909 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11910 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11911 SST->getVectorLength(), SST->getMemoryVT(),
11912 SST->getMemOperand(), SST->getAddressingMode(),
11913 SST->isTruncatingStore(), SST->isCompressingStore());
11914 }
11915 return SDValue();
11916}
11917
11918SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11919 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11920 SDValue Mask = MGT->getMask();
11921 SDValue Chain = MGT->getChain();
11922 SDValue Index = MGT->getIndex();
11923 SDValue Scale = MGT->getScale();
11924 SDValue BasePtr = MGT->getBasePtr();
11925 SDValue VL = MGT->getVectorLength();
11926 ISD::MemIndexType IndexType = MGT->getIndexType();
11927 SDLoc DL(N);
11928
11929 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11930 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11931 return DAG.getGatherVP(
11932 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11933 Ops, MGT->getMemOperand(), IndexType);
11934 }
11935
11936 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11937 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11938 return DAG.getGatherVP(
11939 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11940 Ops, MGT->getMemOperand(), IndexType);
11941 }
11942
11943 return SDValue();
11944}
11945
11946SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11947 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11948 SDValue Mask = MGT->getMask();
11949 SDValue Chain = MGT->getChain();
11950 SDValue Index = MGT->getIndex();
11951 SDValue Scale = MGT->getScale();
11952 SDValue PassThru = MGT->getPassThru();
11953 SDValue BasePtr = MGT->getBasePtr();
11954 ISD::MemIndexType IndexType = MGT->getIndexType();
11955 SDLoc DL(N);
11956
11957 // Zap gathers with a zero mask.
11959 return CombineTo(N, PassThru, MGT->getChain());
11960
11961 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11962 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11963 return DAG.getMaskedGather(
11964 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11965 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11966 }
11967
11968 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11969 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11970 return DAG.getMaskedGather(
11971 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11972 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11973 }
11974
11975 return SDValue();
11976}
11977
11978SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11979 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11980 SDValue Mask = MLD->getMask();
11981 SDLoc DL(N);
11982
11983 // Zap masked loads with a zero mask.
11985 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11986
11987 // If this is a masked load with an all ones mask, we can use a unmasked load.
11988 // FIXME: Can we do this for indexed, expanding, or extending loads?
11989 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11990 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11991 SDValue NewLd = DAG.getLoad(
11992 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11993 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11994 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
11995 return CombineTo(N, NewLd, NewLd.getValue(1));
11996 }
11997
11998 // Try transforming N to an indexed load.
11999 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12000 return SDValue(N, 0);
12001
12002 return SDValue();
12003}
12004
12005SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12006 auto *SLD = cast<VPStridedLoadSDNode>(N);
12007 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12008 // Combine strided loads with unit-stride to a regular VP load.
12009 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12010 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12011 SDValue NewLd = DAG.getLoadVP(
12012 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12013 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12014 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12015 SLD->getMemOperand(), SLD->isExpandingLoad());
12016 return CombineTo(N, NewLd, NewLd.getValue(1));
12017 }
12018 return SDValue();
12019}
12020
12021/// A vector select of 2 constant vectors can be simplified to math/logic to
12022/// avoid a variable select instruction and possibly avoid constant loads.
12023SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12024 SDValue Cond = N->getOperand(0);
12025 SDValue N1 = N->getOperand(1);
12026 SDValue N2 = N->getOperand(2);
12027 EVT VT = N->getValueType(0);
12028 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12032 return SDValue();
12033
12034 // Check if we can use the condition value to increment/decrement a single
12035 // constant value. This simplifies a select to an add and removes a constant
12036 // load/materialization from the general case.
12037 bool AllAddOne = true;
12038 bool AllSubOne = true;
12039 unsigned Elts = VT.getVectorNumElements();
12040 for (unsigned i = 0; i != Elts; ++i) {
12041 SDValue N1Elt = N1.getOperand(i);
12042 SDValue N2Elt = N2.getOperand(i);
12043 if (N1Elt.isUndef() || N2Elt.isUndef())
12044 continue;
12045 if (N1Elt.getValueType() != N2Elt.getValueType())
12046 continue;
12047
12048 const APInt &C1 = N1Elt->getAsAPIntVal();
12049 const APInt &C2 = N2Elt->getAsAPIntVal();
12050 if (C1 != C2 + 1)
12051 AllAddOne = false;
12052 if (C1 != C2 - 1)
12053 AllSubOne = false;
12054 }
12055
12056 // Further simplifications for the extra-special cases where the constants are
12057 // all 0 or all -1 should be implemented as folds of these patterns.
12058 SDLoc DL(N);
12059 if (AllAddOne || AllSubOne) {
12060 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12061 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12062 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12063 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12064 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12065 }
12066
12067 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12068 APInt Pow2C;
12069 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12070 isNullOrNullSplat(N2)) {
12071 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12072 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12073 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12074 }
12075
12077 return V;
12078
12079 // The general case for select-of-constants:
12080 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12081 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12082 // leave that to a machine-specific pass.
12083 return SDValue();
12084}
12085
12086SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12087 SDValue N0 = N->getOperand(0);
12088 SDValue N1 = N->getOperand(1);
12089 SDValue N2 = N->getOperand(2);
12090
12091 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12092 return V;
12093
12094 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12095 return V;
12096
12097 return SDValue();
12098}
12099
12100SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12101 SDValue N0 = N->getOperand(0);
12102 SDValue N1 = N->getOperand(1);
12103 SDValue N2 = N->getOperand(2);
12104 EVT VT = N->getValueType(0);
12105 SDLoc DL(N);
12106
12107 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12108 return V;
12109
12110 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12111 return V;
12112
12113 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12114 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12115 return DAG.getSelect(DL, VT, F, N2, N1);
12116
12117 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12118 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12121 TLI.getBooleanContents(N0.getValueType()) ==
12123 return DAG.getNode(
12124 ISD::ADD, DL, N1.getValueType(), N2,
12125 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12126 }
12127
12128 // Canonicalize integer abs.
12129 // vselect (setg[te] X, 0), X, -X ->
12130 // vselect (setgt X, -1), X, -X ->
12131 // vselect (setl[te] X, 0), -X, X ->
12132 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12133 if (N0.getOpcode() == ISD::SETCC) {
12134 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12135 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12136 bool isAbs = false;
12137 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12138
12139 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12140 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12141 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12143 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12144 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12146
12147 if (isAbs) {
12149 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12150
12151 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12152 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12153 DL, getShiftAmountTy(VT)));
12154 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12155 AddToWorklist(Shift.getNode());
12156 AddToWorklist(Add.getNode());
12157 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12158 }
12159
12160 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12161 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12162 //
12163 // This is OK if we don't care about what happens if either operand is a
12164 // NaN.
12165 //
12166 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12167 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12168 return FMinMax;
12169 }
12170
12171 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12172 return S;
12173 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12174 return S;
12175
12176 // If this select has a condition (setcc) with narrower operands than the
12177 // select, try to widen the compare to match the select width.
12178 // TODO: This should be extended to handle any constant.
12179 // TODO: This could be extended to handle non-loading patterns, but that
12180 // requires thorough testing to avoid regressions.
12181 if (isNullOrNullSplat(RHS)) {
12182 EVT NarrowVT = LHS.getValueType();
12184 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12185 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12186 unsigned WideWidth = WideVT.getScalarSizeInBits();
12187 bool IsSigned = isSignedIntSetCC(CC);
12188 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12189 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12190 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12191 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12192 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12193 // Both compare operands can be widened for free. The LHS can use an
12194 // extended load, and the RHS is a constant:
12195 // vselect (ext (setcc load(X), C)), N1, N2 -->
12196 // vselect (setcc extload(X), C'), N1, N2
12197 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12198 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12199 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12200 EVT WideSetCCVT = getSetCCResultType(WideVT);
12201 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12202 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12203 }
12204 }
12205
12206 // Match VSELECTs with absolute difference patterns.
12207 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12208 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12209 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12210 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12211 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12212 N1.getOperand(0) == N2.getOperand(1) &&
12213 N1.getOperand(1) == N2.getOperand(0)) {
12214 bool IsSigned = isSignedIntSetCC(CC);
12215 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12216 if (hasOperation(ABDOpc, VT)) {
12217 switch (CC) {
12218 case ISD::SETGT:
12219 case ISD::SETGE:
12220 case ISD::SETUGT:
12221 case ISD::SETUGE:
12222 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12223 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12224 break;
12225 case ISD::SETLT:
12226 case ISD::SETLE:
12227 case ISD::SETULT:
12228 case ISD::SETULE:
12229 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12230 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12231 break;
12232 default:
12233 break;
12234 }
12235 }
12236 }
12237
12238 // Match VSELECTs into add with unsigned saturation.
12239 if (hasOperation(ISD::UADDSAT, VT)) {
12240 // Check if one of the arms of the VSELECT is vector with all bits set.
12241 // If it's on the left side invert the predicate to simplify logic below.
12242 SDValue Other;
12243 ISD::CondCode SatCC = CC;
12245 Other = N2;
12246 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12247 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12248 Other = N1;
12249 }
12250
12251 if (Other && Other.getOpcode() == ISD::ADD) {
12252 SDValue CondLHS = LHS, CondRHS = RHS;
12253 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12254
12255 // Canonicalize condition operands.
12256 if (SatCC == ISD::SETUGE) {
12257 std::swap(CondLHS, CondRHS);
12258 SatCC = ISD::SETULE;
12259 }
12260
12261 // We can test against either of the addition operands.
12262 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12263 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12264 if (SatCC == ISD::SETULE && Other == CondRHS &&
12265 (OpLHS == CondLHS || OpRHS == CondLHS))
12266 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12267
12268 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12269 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12270 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12271 CondLHS == OpLHS) {
12272 // If the RHS is a constant we have to reverse the const
12273 // canonicalization.
12274 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12275 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12276 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12277 };
12278 if (SatCC == ISD::SETULE &&
12279 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12280 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12281 }
12282 }
12283 }
12284
12285 // Match VSELECTs into sub with unsigned saturation.
12286 if (hasOperation(ISD::USUBSAT, VT)) {
12287 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12288 // the left side invert the predicate to simplify logic below.
12289 SDValue Other;
12290 ISD::CondCode SatCC = CC;
12292 Other = N2;
12293 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12295 Other = N1;
12296 }
12297
12298 // zext(x) >= y ? trunc(zext(x) - y) : 0
12299 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12300 // zext(x) > y ? trunc(zext(x) - y) : 0
12301 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12302 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12303 Other.getOperand(0).getOpcode() == ISD::SUB &&
12304 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12305 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12306 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12307 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12308 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12309 DAG, DL))
12310 return R;
12311 }
12312
12313 if (Other && Other.getNumOperands() == 2) {
12314 SDValue CondRHS = RHS;
12315 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12316
12317 if (OpLHS == LHS) {
12318 // Look for a general sub with unsigned saturation first.
12319 // x >= y ? x-y : 0 --> usubsat x, y
12320 // x > y ? x-y : 0 --> usubsat x, y
12321 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12322 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12323 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12324
12325 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12326 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12327 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12328 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12329 // If the RHS is a constant we have to reverse the const
12330 // canonicalization.
12331 // x > C-1 ? x+-C : 0 --> usubsat x, C
12332 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12333 return (!Op && !Cond) ||
12334 (Op && Cond &&
12335 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12336 };
12337 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12338 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12339 /*AllowUndefs*/ true)) {
12340 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12341 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12342 }
12343
12344 // Another special case: If C was a sign bit, the sub has been
12345 // canonicalized into a xor.
12346 // FIXME: Would it be better to use computeKnownBits to
12347 // determine whether it's safe to decanonicalize the xor?
12348 // x s< 0 ? x^C : 0 --> usubsat x, C
12349 APInt SplatValue;
12350 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12351 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12353 SplatValue.isSignMask()) {
12354 // Note that we have to rebuild the RHS constant here to
12355 // ensure we don't rely on particular values of undef lanes.
12356 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12357 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12358 }
12359 }
12360 }
12361 }
12362 }
12363 }
12364 }
12365
12366 if (SimplifySelectOps(N, N1, N2))
12367 return SDValue(N, 0); // Don't revisit N.
12368
12369 // Fold (vselect all_ones, N1, N2) -> N1
12371 return N1;
12372 // Fold (vselect all_zeros, N1, N2) -> N2
12374 return N2;
12375
12376 // The ConvertSelectToConcatVector function is assuming both the above
12377 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12378 // and addressed.
12379 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12382 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12383 return CV;
12384 }
12385
12386 if (SDValue V = foldVSelectOfConstants(N))
12387 return V;
12388
12389 if (hasOperation(ISD::SRA, VT))
12391 return V;
12392
12394 return SDValue(N, 0);
12395
12396 return SDValue();
12397}
12398
12399SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12400 SDValue N0 = N->getOperand(0);
12401 SDValue N1 = N->getOperand(1);
12402 SDValue N2 = N->getOperand(2);
12403 SDValue N3 = N->getOperand(3);
12404 SDValue N4 = N->getOperand(4);
12405 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12406
12407 // fold select_cc lhs, rhs, x, x, cc -> x
12408 if (N2 == N3)
12409 return N2;
12410
12411 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12412 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12413 isNullConstant(N1))
12414 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12415
12416 // Determine if the condition we're dealing with is constant
12417 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12418 CC, SDLoc(N), false)) {
12419 AddToWorklist(SCC.getNode());
12420
12421 // cond always true -> true val
12422 // cond always false -> false val
12423 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12424 return SCCC->isZero() ? N3 : N2;
12425
12426 // When the condition is UNDEF, just return the first operand. This is
12427 // coherent the DAG creation, no setcc node is created in this case
12428 if (SCC->isUndef())
12429 return N2;
12430
12431 // Fold to a simpler select_cc
12432 if (SCC.getOpcode() == ISD::SETCC) {
12433 SDValue SelectOp = DAG.getNode(
12434 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12435 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12436 SelectOp->setFlags(SCC->getFlags());
12437 return SelectOp;
12438 }
12439 }
12440
12441 // If we can fold this based on the true/false value, do so.
12442 if (SimplifySelectOps(N, N2, N3))
12443 return SDValue(N, 0); // Don't revisit N.
12444
12445 // fold select_cc into other things, such as min/max/abs
12446 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12447}
12448
12449SDValue DAGCombiner::visitSETCC(SDNode *N) {
12450 // setcc is very commonly used as an argument to brcond. This pattern
12451 // also lend itself to numerous combines and, as a result, it is desired
12452 // we keep the argument to a brcond as a setcc as much as possible.
12453 bool PreferSetCC =
12454 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12455
12456 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12457 EVT VT = N->getValueType(0);
12458 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12459
12460 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12461
12462 if (Combined) {
12463 // If we prefer to have a setcc, and we don't, we'll try our best to
12464 // recreate one using rebuildSetCC.
12465 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12466 SDValue NewSetCC = rebuildSetCC(Combined);
12467
12468 // We don't have anything interesting to combine to.
12469 if (NewSetCC.getNode() == N)
12470 return SDValue();
12471
12472 if (NewSetCC)
12473 return NewSetCC;
12474 }
12475 return Combined;
12476 }
12477
12478 // Optimize
12479 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12480 // or
12481 // 2) (icmp eq/ne X, (rotate X, C1))
12482 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12483 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12484 // Then:
12485 // If C1 is a power of 2, then the rotate and shift+and versions are
12486 // equivilent, so we can interchange them depending on target preference.
12487 // Otherwise, if we have the shift+and version we can interchange srl/shl
12488 // which inturn affects the constant C0. We can use this to get better
12489 // constants again determined by target preference.
12490 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12491 auto IsAndWithShift = [](SDValue A, SDValue B) {
12492 return A.getOpcode() == ISD::AND &&
12493 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12494 A.getOperand(0) == B.getOperand(0);
12495 };
12496 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12497 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12498 B.getOperand(0) == A;
12499 };
12500 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12501 bool IsRotate = false;
12502
12503 // Find either shift+and or rotate pattern.
12504 if (IsAndWithShift(N0, N1)) {
12505 AndOrOp = N0;
12506 ShiftOrRotate = N1;
12507 } else if (IsAndWithShift(N1, N0)) {
12508 AndOrOp = N1;
12509 ShiftOrRotate = N0;
12510 } else if (IsRotateWithOp(N0, N1)) {
12511 IsRotate = true;
12512 AndOrOp = N0;
12513 ShiftOrRotate = N1;
12514 } else if (IsRotateWithOp(N1, N0)) {
12515 IsRotate = true;
12516 AndOrOp = N1;
12517 ShiftOrRotate = N0;
12518 }
12519
12520 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12521 (IsRotate || AndOrOp.hasOneUse())) {
12522 EVT OpVT = N0.getValueType();
12523 // Get constant shift/rotate amount and possibly mask (if its shift+and
12524 // variant).
12525 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12526 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12527 /*AllowTrunc*/ false);
12528 if (CNode == nullptr)
12529 return std::nullopt;
12530 return CNode->getAPIntValue();
12531 };
12532 std::optional<APInt> AndCMask =
12533 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12534 std::optional<APInt> ShiftCAmt =
12535 GetAPIntValue(ShiftOrRotate.getOperand(1));
12536 unsigned NumBits = OpVT.getScalarSizeInBits();
12537
12538 // We found constants.
12539 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12540 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12541 // Check that the constants meet the constraints.
12542 bool CanTransform = IsRotate;
12543 if (!CanTransform) {
12544 // Check that mask and shift compliment eachother
12545 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12546 // Check that we are comparing all bits
12547 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12548 // Check that the and mask is correct for the shift
12549 CanTransform &=
12550 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12551 }
12552
12553 // See if target prefers another shift/rotate opcode.
12554 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12555 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12556 // Transform is valid and we have a new preference.
12557 if (CanTransform && NewShiftOpc != ShiftOpc) {
12558 SDLoc DL(N);
12559 SDValue NewShiftOrRotate =
12560 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12561 ShiftOrRotate.getOperand(1));
12562 SDValue NewAndOrOp = SDValue();
12563
12564 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12565 APInt NewMask =
12566 NewShiftOpc == ISD::SHL
12567 ? APInt::getHighBitsSet(NumBits,
12568 NumBits - ShiftCAmt->getZExtValue())
12569 : APInt::getLowBitsSet(NumBits,
12570 NumBits - ShiftCAmt->getZExtValue());
12571 NewAndOrOp =
12572 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12573 DAG.getConstant(NewMask, DL, OpVT));
12574 } else {
12575 NewAndOrOp = ShiftOrRotate.getOperand(0);
12576 }
12577
12578 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12579 }
12580 }
12581 }
12582 }
12583 return SDValue();
12584}
12585
12586SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12587 SDValue LHS = N->getOperand(0);
12588 SDValue RHS = N->getOperand(1);
12589 SDValue Carry = N->getOperand(2);
12590 SDValue Cond = N->getOperand(3);
12591
12592 // If Carry is false, fold to a regular SETCC.
12593 if (isNullConstant(Carry))
12594 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12595
12596 return SDValue();
12597}
12598
12599/// Check if N satisfies:
12600/// N is used once.
12601/// N is a Load.
12602/// The load is compatible with ExtOpcode. It means
12603/// If load has explicit zero/sign extension, ExpOpcode must have the same
12604/// extension.
12605/// Otherwise returns true.
12606static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12607 if (!N.hasOneUse())
12608 return false;
12609
12610 if (!isa<LoadSDNode>(N))
12611 return false;
12612
12613 LoadSDNode *Load = cast<LoadSDNode>(N);
12614 ISD::LoadExtType LoadExt = Load->getExtensionType();
12615 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12616 return true;
12617
12618 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12619 // extension.
12620 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12621 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12622 return false;
12623
12624 return true;
12625}
12626
12627/// Fold
12628/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12629/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12630/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12631/// This function is called by the DAGCombiner when visiting sext/zext/aext
12632/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12634 SelectionDAG &DAG,
12635 CombineLevel Level) {
12636 unsigned Opcode = N->getOpcode();
12637 SDValue N0 = N->getOperand(0);
12638 EVT VT = N->getValueType(0);
12639 SDLoc DL(N);
12640
12641 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12642 Opcode == ISD::ANY_EXTEND) &&
12643 "Expected EXTEND dag node in input!");
12644
12645 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12646 !N0.hasOneUse())
12647 return SDValue();
12648
12649 SDValue Op1 = N0->getOperand(1);
12650 SDValue Op2 = N0->getOperand(2);
12651 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12652 return SDValue();
12653
12654 auto ExtLoadOpcode = ISD::EXTLOAD;
12655 if (Opcode == ISD::SIGN_EXTEND)
12656 ExtLoadOpcode = ISD::SEXTLOAD;
12657 else if (Opcode == ISD::ZERO_EXTEND)
12658 ExtLoadOpcode = ISD::ZEXTLOAD;
12659
12660 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12661 // Combine2), so we should conservatively check the OperationAction.
12662 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12663 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12664 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12665 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12666 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12668 return SDValue();
12669
12670 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12671 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12672 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12673}
12674
12675/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12676/// a build_vector of constants.
12677/// This function is called by the DAGCombiner when visiting sext/zext/aext
12678/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12679/// Vector extends are not folded if operations are legal; this is to
12680/// avoid introducing illegal build_vector dag nodes.
12682 const TargetLowering &TLI,
12683 SelectionDAG &DAG, bool LegalTypes) {
12684 unsigned Opcode = N->getOpcode();
12685 SDValue N0 = N->getOperand(0);
12686 EVT VT = N->getValueType(0);
12687
12688 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12689 "Expected EXTEND dag node in input!");
12690
12691 // fold (sext c1) -> c1
12692 // fold (zext c1) -> c1
12693 // fold (aext c1) -> c1
12694 if (isa<ConstantSDNode>(N0))
12695 return DAG.getNode(Opcode, DL, VT, N0);
12696
12697 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12698 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12699 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12700 if (N0->getOpcode() == ISD::SELECT) {
12701 SDValue Op1 = N0->getOperand(1);
12702 SDValue Op2 = N0->getOperand(2);
12703 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12704 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12705 // For any_extend, choose sign extension of the constants to allow a
12706 // possible further transform to sign_extend_inreg.i.e.
12707 //
12708 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12709 // t2: i64 = any_extend t1
12710 // -->
12711 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12712 // -->
12713 // t4: i64 = sign_extend_inreg t3
12714 unsigned FoldOpc = Opcode;
12715 if (FoldOpc == ISD::ANY_EXTEND)
12716 FoldOpc = ISD::SIGN_EXTEND;
12717 return DAG.getSelect(DL, VT, N0->getOperand(0),
12718 DAG.getNode(FoldOpc, DL, VT, Op1),
12719 DAG.getNode(FoldOpc, DL, VT, Op2));
12720 }
12721 }
12722
12723 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12724 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12725 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12726 EVT SVT = VT.getScalarType();
12727 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12729 return SDValue();
12730
12731 // We can fold this node into a build_vector.
12732 unsigned VTBits = SVT.getSizeInBits();
12733 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12735 unsigned NumElts = VT.getVectorNumElements();
12736
12737 for (unsigned i = 0; i != NumElts; ++i) {
12738 SDValue Op = N0.getOperand(i);
12739 if (Op.isUndef()) {
12740 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12741 Elts.push_back(DAG.getUNDEF(SVT));
12742 else
12743 Elts.push_back(DAG.getConstant(0, DL, SVT));
12744 continue;
12745 }
12746
12747 SDLoc DL(Op);
12748 // Get the constant value and if needed trunc it to the size of the type.
12749 // Nodes like build_vector might have constants wider than the scalar type.
12750 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12751 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12752 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12753 else
12754 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12755 }
12756
12757 return DAG.getBuildVector(VT, DL, Elts);
12758}
12759
12760// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12761// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12762// transformation. Returns true if extension are possible and the above
12763// mentioned transformation is profitable.
12765 unsigned ExtOpc,
12766 SmallVectorImpl<SDNode *> &ExtendNodes,
12767 const TargetLowering &TLI) {
12768 bool HasCopyToRegUses = false;
12769 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12770 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12771 ++UI) {
12772 SDNode *User = *UI;
12773 if (User == N)
12774 continue;
12775 if (UI.getUse().getResNo() != N0.getResNo())
12776 continue;
12777 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12778 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12779 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12780 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12781 // Sign bits will be lost after a zext.
12782 return false;
12783 bool Add = false;
12784 for (unsigned i = 0; i != 2; ++i) {
12785 SDValue UseOp = User->getOperand(i);
12786 if (UseOp == N0)
12787 continue;
12788 if (!isa<ConstantSDNode>(UseOp))
12789 return false;
12790 Add = true;
12791 }
12792 if (Add)
12793 ExtendNodes.push_back(User);
12794 continue;
12795 }
12796 // If truncates aren't free and there are users we can't
12797 // extend, it isn't worthwhile.
12798 if (!isTruncFree)
12799 return false;
12800 // Remember if this value is live-out.
12801 if (User->getOpcode() == ISD::CopyToReg)
12802 HasCopyToRegUses = true;
12803 }
12804
12805 if (HasCopyToRegUses) {
12806 bool BothLiveOut = false;
12807 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12808 UI != UE; ++UI) {
12809 SDUse &Use = UI.getUse();
12810 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12811 BothLiveOut = true;
12812 break;
12813 }
12814 }
12815 if (BothLiveOut)
12816 // Both unextended and extended values are live out. There had better be
12817 // a good reason for the transformation.
12818 return !ExtendNodes.empty();
12819 }
12820 return true;
12821}
12822
12823void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12824 SDValue OrigLoad, SDValue ExtLoad,
12825 ISD::NodeType ExtType) {
12826 // Extend SetCC uses if necessary.
12827 SDLoc DL(ExtLoad);
12828 for (SDNode *SetCC : SetCCs) {
12830
12831 for (unsigned j = 0; j != 2; ++j) {
12832 SDValue SOp = SetCC->getOperand(j);
12833 if (SOp == OrigLoad)
12834 Ops.push_back(ExtLoad);
12835 else
12836 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12837 }
12838
12839 Ops.push_back(SetCC->getOperand(2));
12840 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12841 }
12842}
12843
12844// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12845SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12846 SDValue N0 = N->getOperand(0);
12847 EVT DstVT = N->getValueType(0);
12848 EVT SrcVT = N0.getValueType();
12849
12850 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12851 N->getOpcode() == ISD::ZERO_EXTEND) &&
12852 "Unexpected node type (not an extend)!");
12853
12854 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12855 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12856 // (v8i32 (sext (v8i16 (load x))))
12857 // into:
12858 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12859 // (v4i32 (sextload (x + 16)))))
12860 // Where uses of the original load, i.e.:
12861 // (v8i16 (load x))
12862 // are replaced with:
12863 // (v8i16 (truncate
12864 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12865 // (v4i32 (sextload (x + 16)))))))
12866 //
12867 // This combine is only applicable to illegal, but splittable, vectors.
12868 // All legal types, and illegal non-vector types, are handled elsewhere.
12869 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12870 //
12871 if (N0->getOpcode() != ISD::LOAD)
12872 return SDValue();
12873
12874 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12875
12876 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12877 !N0.hasOneUse() || !LN0->isSimple() ||
12878 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12880 return SDValue();
12881
12883 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12884 return SDValue();
12885
12886 ISD::LoadExtType ExtType =
12887 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12888
12889 // Try to split the vector types to get down to legal types.
12890 EVT SplitSrcVT = SrcVT;
12891 EVT SplitDstVT = DstVT;
12892 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12893 SplitSrcVT.getVectorNumElements() > 1) {
12894 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12895 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12896 }
12897
12898 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12899 return SDValue();
12900
12901 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12902
12903 SDLoc DL(N);
12904 const unsigned NumSplits =
12905 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12906 const unsigned Stride = SplitSrcVT.getStoreSize();
12909
12910 SDValue BasePtr = LN0->getBasePtr();
12911 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12912 const unsigned Offset = Idx * Stride;
12913
12914 SDValue SplitLoad =
12915 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
12916 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
12917 SplitSrcVT, LN0->getOriginalAlign(),
12918 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12919
12920 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12921
12922 Loads.push_back(SplitLoad.getValue(0));
12923 Chains.push_back(SplitLoad.getValue(1));
12924 }
12925
12926 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12927 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12928
12929 // Simplify TF.
12930 AddToWorklist(NewChain.getNode());
12931
12932 CombineTo(N, NewValue);
12933
12934 // Replace uses of the original load (before extension)
12935 // with a truncate of the concatenated sextloaded vectors.
12936 SDValue Trunc =
12937 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12938 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12939 CombineTo(N0.getNode(), Trunc, NewChain);
12940 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12941}
12942
12943// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12944// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12945SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12946 assert(N->getOpcode() == ISD::ZERO_EXTEND);
12947 EVT VT = N->getValueType(0);
12948 EVT OrigVT = N->getOperand(0).getValueType();
12949 if (TLI.isZExtFree(OrigVT, VT))
12950 return SDValue();
12951
12952 // and/or/xor
12953 SDValue N0 = N->getOperand(0);
12954 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12955 N0.getOperand(1).getOpcode() != ISD::Constant ||
12956 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12957 return SDValue();
12958
12959 // shl/shr
12960 SDValue N1 = N0->getOperand(0);
12961 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12962 N1.getOperand(1).getOpcode() != ISD::Constant ||
12963 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12964 return SDValue();
12965
12966 // load
12967 if (!isa<LoadSDNode>(N1.getOperand(0)))
12968 return SDValue();
12969 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12970 EVT MemVT = Load->getMemoryVT();
12971 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12972 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12973 return SDValue();
12974
12975
12976 // If the shift op is SHL, the logic op must be AND, otherwise the result
12977 // will be wrong.
12978 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12979 return SDValue();
12980
12981 if (!N0.hasOneUse() || !N1.hasOneUse())
12982 return SDValue();
12983
12985 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12986 ISD::ZERO_EXTEND, SetCCs, TLI))
12987 return SDValue();
12988
12989 // Actually do the transformation.
12990 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12991 Load->getChain(), Load->getBasePtr(),
12992 Load->getMemoryVT(), Load->getMemOperand());
12993
12994 SDLoc DL1(N1);
12995 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12996 N1.getOperand(1));
12997
12999 SDLoc DL0(N0);
13000 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13001 DAG.getConstant(Mask, DL0, VT));
13002
13003 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13004 CombineTo(N, And);
13005 if (SDValue(Load, 0).hasOneUse()) {
13006 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13007 } else {
13008 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13009 Load->getValueType(0), ExtLoad);
13010 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13011 }
13012
13013 // N0 is dead at this point.
13014 recursivelyDeleteUnusedNodes(N0.getNode());
13015
13016 return SDValue(N,0); // Return N so it doesn't get rechecked!
13017}
13018
13019/// If we're narrowing or widening the result of a vector select and the final
13020/// size is the same size as a setcc (compare) feeding the select, then try to
13021/// apply the cast operation to the select's operands because matching vector
13022/// sizes for a select condition and other operands should be more efficient.
13023SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13024 unsigned CastOpcode = Cast->getOpcode();
13025 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13026 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13027 CastOpcode == ISD::FP_ROUND) &&
13028 "Unexpected opcode for vector select narrowing/widening");
13029
13030 // We only do this transform before legal ops because the pattern may be
13031 // obfuscated by target-specific operations after legalization. Do not create
13032 // an illegal select op, however, because that may be difficult to lower.
13033 EVT VT = Cast->getValueType(0);
13034 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13035 return SDValue();
13036
13037 SDValue VSel = Cast->getOperand(0);
13038 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13039 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13040 return SDValue();
13041
13042 // Does the setcc have the same vector size as the casted select?
13043 SDValue SetCC = VSel.getOperand(0);
13044 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13045 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13046 return SDValue();
13047
13048 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13049 SDValue A = VSel.getOperand(1);
13050 SDValue B = VSel.getOperand(2);
13051 SDValue CastA, CastB;
13052 SDLoc DL(Cast);
13053 if (CastOpcode == ISD::FP_ROUND) {
13054 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13055 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13056 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13057 } else {
13058 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13059 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13060 }
13061 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13062}
13063
13064// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13065// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13067 const TargetLowering &TLI, EVT VT,
13068 bool LegalOperations, SDNode *N,
13069 SDValue N0, ISD::LoadExtType ExtLoadType) {
13070 SDNode *N0Node = N0.getNode();
13071 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13072 : ISD::isZEXTLoad(N0Node);
13073 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13074 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13075 return SDValue();
13076
13077 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13078 EVT MemVT = LN0->getMemoryVT();
13079 if ((LegalOperations || !LN0->isSimple() ||
13080 VT.isVector()) &&
13081 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13082 return SDValue();
13083
13084 SDValue ExtLoad =
13085 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13086 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13087 Combiner.CombineTo(N, ExtLoad);
13088 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13089 if (LN0->use_empty())
13090 Combiner.recursivelyDeleteUnusedNodes(LN0);
13091 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13092}
13093
13094// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13095// Only generate vector extloads when 1) they're legal, and 2) they are
13096// deemed desirable by the target. NonNegZExt can be set to true if a zero
13097// extend has the nonneg flag to allow use of sextload if profitable.
13099 const TargetLowering &TLI, EVT VT,
13100 bool LegalOperations, SDNode *N, SDValue N0,
13101 ISD::LoadExtType ExtLoadType,
13102 ISD::NodeType ExtOpc,
13103 bool NonNegZExt = false) {
13105 return {};
13106
13107 // If this is zext nneg, see if it would make sense to treat it as a sext.
13108 if (NonNegZExt) {
13109 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13110 "Unexpected load type or opcode");
13111 for (SDNode *User : N0->uses()) {
13112 if (User->getOpcode() == ISD::SETCC) {
13113 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13115 ExtLoadType = ISD::SEXTLOAD;
13116 ExtOpc = ISD::SIGN_EXTEND;
13117 break;
13118 }
13119 }
13120 }
13121 }
13122
13123 // TODO: isFixedLengthVector() should be removed and any negative effects on
13124 // code generation being the result of that target's implementation of
13125 // isVectorLoadExtDesirable().
13126 if ((LegalOperations || VT.isFixedLengthVector() ||
13127 !cast<LoadSDNode>(N0)->isSimple()) &&
13128 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13129 return {};
13130
13131 bool DoXform = true;
13133 if (!N0.hasOneUse())
13134 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13135 if (VT.isVector())
13136 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13137 if (!DoXform)
13138 return {};
13139
13140 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13141 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13142 LN0->getBasePtr(), N0.getValueType(),
13143 LN0->getMemOperand());
13144 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13145 // If the load value is used only by N, replace it via CombineTo N.
13146 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13147 Combiner.CombineTo(N, ExtLoad);
13148 if (NoReplaceTrunc) {
13149 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13150 Combiner.recursivelyDeleteUnusedNodes(LN0);
13151 } else {
13152 SDValue Trunc =
13153 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13154 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13155 }
13156 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13157}
13158
13159static SDValue
13161 bool LegalOperations, SDNode *N, SDValue N0,
13162 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13163 if (!N0.hasOneUse())
13164 return SDValue();
13165
13166 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13167 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13168 return SDValue();
13169
13170 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13171 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13172 return SDValue();
13173
13174 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13175 return SDValue();
13176
13177 SDLoc dl(Ld);
13178 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13179 SDValue NewLoad = DAG.getMaskedLoad(
13180 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13181 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13182 ExtLoadType, Ld->isExpandingLoad());
13183 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13184 return NewLoad;
13185}
13186
13187// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13189 const TargetLowering &TLI, EVT VT,
13190 SDValue N0,
13191 ISD::LoadExtType ExtLoadType) {
13192 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13193 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13194 return {};
13195 EVT MemoryVT = ALoad->getMemoryVT();
13196 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13197 return {};
13198 // Can't fold into ALoad if it is already extending differently.
13199 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13200 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13201 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13202 return {};
13203
13204 EVT OrigVT = ALoad->getValueType(0);
13205 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13206 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13207 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13208 ALoad->getBasePtr(), ALoad->getMemOperand()));
13209 NewALoad->setExtensionType(ExtLoadType);
13211 SDValue(ALoad, 0),
13212 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13213 // Update the chain uses.
13214 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13215 return SDValue(NewALoad, 0);
13216}
13217
13219 bool LegalOperations) {
13220 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13221 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13222
13223 SDValue SetCC = N->getOperand(0);
13224 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13225 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13226 return SDValue();
13227
13228 SDValue X = SetCC.getOperand(0);
13229 SDValue Ones = SetCC.getOperand(1);
13230 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13231 EVT VT = N->getValueType(0);
13232 EVT XVT = X.getValueType();
13233 // setge X, C is canonicalized to setgt, so we do not need to match that
13234 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13235 // not require the 'not' op.
13236 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13237 // Invert and smear/shift the sign bit:
13238 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13239 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13240 SDLoc DL(N);
13241 unsigned ShCt = VT.getSizeInBits() - 1;
13242 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13243 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13244 SDValue NotX = DAG.getNOT(DL, X, VT);
13245 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13246 auto ShiftOpcode =
13247 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13248 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13249 }
13250 }
13251 return SDValue();
13252}
13253
13254SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13255 SDValue N0 = N->getOperand(0);
13256 if (N0.getOpcode() != ISD::SETCC)
13257 return SDValue();
13258
13259 SDValue N00 = N0.getOperand(0);
13260 SDValue N01 = N0.getOperand(1);
13261 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13262 EVT VT = N->getValueType(0);
13263 EVT N00VT = N00.getValueType();
13264 SDLoc DL(N);
13265
13266 // Propagate fast-math-flags.
13267 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13268
13269 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13270 // the same size as the compared operands. Try to optimize sext(setcc())
13271 // if this is the case.
13272 if (VT.isVector() && !LegalOperations &&
13273 TLI.getBooleanContents(N00VT) ==
13275 EVT SVT = getSetCCResultType(N00VT);
13276
13277 // If we already have the desired type, don't change it.
13278 if (SVT != N0.getValueType()) {
13279 // We know that the # elements of the results is the same as the
13280 // # elements of the compare (and the # elements of the compare result
13281 // for that matter). Check to see that they are the same size. If so,
13282 // we know that the element size of the sext'd result matches the
13283 // element size of the compare operands.
13284 if (VT.getSizeInBits() == SVT.getSizeInBits())
13285 return DAG.getSetCC(DL, VT, N00, N01, CC);
13286
13287 // If the desired elements are smaller or larger than the source
13288 // elements, we can use a matching integer vector type and then
13289 // truncate/sign extend.
13290 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13291 if (SVT == MatchingVecType) {
13292 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13293 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13294 }
13295 }
13296
13297 // Try to eliminate the sext of a setcc by zexting the compare operands.
13298 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13300 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13301 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13302 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13303
13304 // We have an unsupported narrow vector compare op that would be legal
13305 // if extended to the destination type. See if the compare operands
13306 // can be freely extended to the destination type.
13307 auto IsFreeToExtend = [&](SDValue V) {
13308 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13309 return true;
13310 // Match a simple, non-extended load that can be converted to a
13311 // legal {z/s}ext-load.
13312 // TODO: Allow widening of an existing {z/s}ext-load?
13313 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13314 ISD::isUNINDEXEDLoad(V.getNode()) &&
13315 cast<LoadSDNode>(V)->isSimple() &&
13316 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13317 return false;
13318
13319 // Non-chain users of this value must either be the setcc in this
13320 // sequence or extends that can be folded into the new {z/s}ext-load.
13321 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13322 UI != UE; ++UI) {
13323 // Skip uses of the chain and the setcc.
13324 SDNode *User = *UI;
13325 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13326 continue;
13327 // Extra users must have exactly the same cast we are about to create.
13328 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13329 // is enhanced similarly.
13330 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13331 return false;
13332 }
13333 return true;
13334 };
13335
13336 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13337 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13338 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13339 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13340 }
13341 }
13342 }
13343
13344 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13345 // Here, T can be 1 or -1, depending on the type of the setcc and
13346 // getBooleanContents().
13347 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13348
13349 // To determine the "true" side of the select, we need to know the high bit
13350 // of the value returned by the setcc if it evaluates to true.
13351 // If the type of the setcc is i1, then the true case of the select is just
13352 // sext(i1 1), that is, -1.
13353 // If the type of the setcc is larger (say, i8) then the value of the high
13354 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13355 // of the appropriate width.
13356 SDValue ExtTrueVal = (SetCCWidth == 1)
13357 ? DAG.getAllOnesConstant(DL, VT)
13358 : DAG.getBoolConstant(true, DL, VT, N00VT);
13359 SDValue Zero = DAG.getConstant(0, DL, VT);
13360 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13361 return SCC;
13362
13363 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13364 EVT SetCCVT = getSetCCResultType(N00VT);
13365 // Don't do this transform for i1 because there's a select transform
13366 // that would reverse it.
13367 // TODO: We should not do this transform at all without a target hook
13368 // because a sext is likely cheaper than a select?
13369 if (SetCCVT.getScalarSizeInBits() != 1 &&
13370 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13371 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13372 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13373 }
13374 }
13375
13376 return SDValue();
13377}
13378
13379SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13380 SDValue N0 = N->getOperand(0);
13381 EVT VT = N->getValueType(0);
13382 SDLoc DL(N);
13383
13384 if (VT.isVector())
13385 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13386 return FoldedVOp;
13387
13388 // sext(undef) = 0 because the top bit will all be the same.
13389 if (N0.isUndef())
13390 return DAG.getConstant(0, DL, VT);
13391
13392 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13393 return Res;
13394
13395 // fold (sext (sext x)) -> (sext x)
13396 // fold (sext (aext x)) -> (sext x)
13397 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13398 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13399
13400 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13401 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13405 N0.getOperand(0));
13406
13407 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13408 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13409 SDValue N00 = N0.getOperand(0);
13410 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13411 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13412 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13413 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13414 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13415 }
13416 }
13417
13418 if (N0.getOpcode() == ISD::TRUNCATE) {
13419 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13420 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13421 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13422 SDNode *oye = N0.getOperand(0).getNode();
13423 if (NarrowLoad.getNode() != N0.getNode()) {
13424 CombineTo(N0.getNode(), NarrowLoad);
13425 // CombineTo deleted the truncate, if needed, but not what's under it.
13426 AddToWorklist(oye);
13427 }
13428 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13429 }
13430
13431 // See if the value being truncated is already sign extended. If so, just
13432 // eliminate the trunc/sext pair.
13433 SDValue Op = N0.getOperand(0);
13434 unsigned OpBits = Op.getScalarValueSizeInBits();
13435 unsigned MidBits = N0.getScalarValueSizeInBits();
13436 unsigned DestBits = VT.getScalarSizeInBits();
13437 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13438
13439 if (OpBits == DestBits) {
13440 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13441 // bits, it is already ready.
13442 if (NumSignBits > DestBits-MidBits)
13443 return Op;
13444 } else if (OpBits < DestBits) {
13445 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13446 // bits, just sext from i32.
13447 if (NumSignBits > OpBits-MidBits)
13448 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13449 } else {
13450 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13451 // bits, just truncate to i32.
13452 if (NumSignBits > OpBits-MidBits)
13453 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13454 }
13455
13456 // fold (sext (truncate x)) -> (sextinreg x).
13457 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13458 N0.getValueType())) {
13459 if (OpBits < DestBits)
13460 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13461 else if (OpBits > DestBits)
13462 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13463 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13464 DAG.getValueType(N0.getValueType()));
13465 }
13466 }
13467
13468 // Try to simplify (sext (load x)).
13469 if (SDValue foldedExt =
13470 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13472 return foldedExt;
13473
13474 if (SDValue foldedExt =
13475 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13477 return foldedExt;
13478
13479 // fold (sext (load x)) to multiple smaller sextloads.
13480 // Only on illegal but splittable vectors.
13481 if (SDValue ExtLoad = CombineExtLoad(N))
13482 return ExtLoad;
13483
13484 // Try to simplify (sext (sextload x)).
13485 if (SDValue foldedExt = tryToFoldExtOfExtload(
13486 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13487 return foldedExt;
13488
13489 // Try to simplify (sext (atomic_load x)).
13490 if (SDValue foldedExt =
13491 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13492 return foldedExt;
13493
13494 // fold (sext (and/or/xor (load x), cst)) ->
13495 // (and/or/xor (sextload x), (sext cst))
13496 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13497 isa<LoadSDNode>(N0.getOperand(0)) &&
13498 N0.getOperand(1).getOpcode() == ISD::Constant &&
13499 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13500 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13501 EVT MemVT = LN00->getMemoryVT();
13502 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13503 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13505 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13506 ISD::SIGN_EXTEND, SetCCs, TLI);
13507 if (DoXform) {
13508 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13509 LN00->getChain(), LN00->getBasePtr(),
13510 LN00->getMemoryVT(),
13511 LN00->getMemOperand());
13513 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13514 ExtLoad, DAG.getConstant(Mask, DL, VT));
13515 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13516 bool NoReplaceTruncAnd = !N0.hasOneUse();
13517 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13518 CombineTo(N, And);
13519 // If N0 has multiple uses, change other uses as well.
13520 if (NoReplaceTruncAnd) {
13521 SDValue TruncAnd =
13523 CombineTo(N0.getNode(), TruncAnd);
13524 }
13525 if (NoReplaceTrunc) {
13526 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13527 } else {
13528 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13529 LN00->getValueType(0), ExtLoad);
13530 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13531 }
13532 return SDValue(N,0); // Return N so it doesn't get rechecked!
13533 }
13534 }
13535 }
13536
13537 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13538 return V;
13539
13540 if (SDValue V = foldSextSetcc(N))
13541 return V;
13542
13543 // fold (sext x) -> (zext x) if the sign bit is known zero.
13544 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13545 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13546 DAG.SignBitIsZero(N0)) {
13548 Flags.setNonNeg(true);
13549 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13550 }
13551
13552 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13553 return NewVSel;
13554
13555 // Eliminate this sign extend by doing a negation in the destination type:
13556 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13557 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13561 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13562 return DAG.getNegative(Zext, DL, VT);
13563 }
13564 // Eliminate this sign extend by doing a decrement in the destination type:
13565 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13566 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13570 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13571 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13572 }
13573
13574 // fold sext (not i1 X) -> add (zext i1 X), -1
13575 // TODO: This could be extended to handle bool vectors.
13576 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13577 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13578 TLI.isOperationLegal(ISD::ADD, VT)))) {
13579 // If we can eliminate the 'not', the sext form should be better
13580 if (SDValue NewXor = visitXOR(N0.getNode())) {
13581 // Returning N0 is a form of in-visit replacement that may have
13582 // invalidated N0.
13583 if (NewXor.getNode() == N0.getNode()) {
13584 // Return SDValue here as the xor should have already been replaced in
13585 // this sext.
13586 return SDValue();
13587 }
13588
13589 // Return a new sext with the new xor.
13590 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13591 }
13592
13593 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13594 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13595 }
13596
13597 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13598 return Res;
13599
13600 return SDValue();
13601}
13602
13603/// Given an extending node with a pop-count operand, if the target does not
13604/// support a pop-count in the narrow source type but does support it in the
13605/// destination type, widen the pop-count to the destination type.
13606static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13607 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13608 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13609
13610 SDValue CtPop = Extend->getOperand(0);
13611 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13612 return SDValue();
13613
13614 EVT VT = Extend->getValueType(0);
13615 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13618 return SDValue();
13619
13620 // zext (ctpop X) --> ctpop (zext X)
13621 SDLoc DL(Extend);
13622 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13623 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13624}
13625
13626// If we have (zext (abs X)) where X is a type that will be promoted by type
13627// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13628static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13629 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13630
13631 EVT VT = Extend->getValueType(0);
13632 if (VT.isVector())
13633 return SDValue();
13634
13635 SDValue Abs = Extend->getOperand(0);
13636 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13637 return SDValue();
13638
13639 EVT AbsVT = Abs.getValueType();
13640 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13641 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13643 return SDValue();
13644
13645 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13646
13647 SDValue SExt =
13648 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13649 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13650 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13651}
13652
13653SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13654 SDValue N0 = N->getOperand(0);
13655 EVT VT = N->getValueType(0);
13656 SDLoc DL(N);
13657
13658 if (VT.isVector())
13659 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13660 return FoldedVOp;
13661
13662 // zext(undef) = 0
13663 if (N0.isUndef())
13664 return DAG.getConstant(0, DL, VT);
13665
13666 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13667 return Res;
13668
13669 // fold (zext (zext x)) -> (zext x)
13670 // fold (zext (aext x)) -> (zext x)
13671 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13673 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13674 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13675 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13676 }
13677
13678 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13679 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13683 N0.getOperand(0));
13684
13685 // fold (zext (truncate x)) -> (zext x) or
13686 // (zext (truncate x)) -> (truncate x)
13687 // This is valid when the truncated bits of x are already zero.
13688 SDValue Op;
13689 KnownBits Known;
13690 if (isTruncateOf(DAG, N0, Op, Known)) {
13691 APInt TruncatedBits =
13692 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13693 APInt(Op.getScalarValueSizeInBits(), 0) :
13694 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13696 std::min(Op.getScalarValueSizeInBits(),
13697 VT.getScalarSizeInBits()));
13698 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13699 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13700 DAG.salvageDebugInfo(*N0.getNode());
13701
13702 return ZExtOrTrunc;
13703 }
13704 }
13705
13706 // fold (zext (truncate x)) -> (and x, mask)
13707 if (N0.getOpcode() == ISD::TRUNCATE) {
13708 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13709 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13710 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13711 SDNode *oye = N0.getOperand(0).getNode();
13712 if (NarrowLoad.getNode() != N0.getNode()) {
13713 CombineTo(N0.getNode(), NarrowLoad);
13714 // CombineTo deleted the truncate, if needed, but not what's under it.
13715 AddToWorklist(oye);
13716 }
13717 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13718 }
13719
13720 EVT SrcVT = N0.getOperand(0).getValueType();
13721 EVT MinVT = N0.getValueType();
13722
13723 if (N->getFlags().hasNonNeg()) {
13724 SDValue Op = N0.getOperand(0);
13725 unsigned OpBits = SrcVT.getScalarSizeInBits();
13726 unsigned MidBits = MinVT.getScalarSizeInBits();
13727 unsigned DestBits = VT.getScalarSizeInBits();
13728 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13729
13730 if (OpBits == DestBits) {
13731 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13732 // bits, it is already ready.
13733 if (NumSignBits > DestBits - MidBits)
13734 return Op;
13735 } else if (OpBits < DestBits) {
13736 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13737 // bits, just sext from i32.
13738 // FIXME: This can probably be ZERO_EXTEND nneg?
13739 if (NumSignBits > OpBits - MidBits)
13740 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13741 } else {
13742 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13743 // bits, just truncate to i32.
13744 if (NumSignBits > OpBits - MidBits)
13745 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13746 }
13747 }
13748
13749 // Try to mask before the extension to avoid having to generate a larger mask,
13750 // possibly over several sub-vectors.
13751 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13752 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13754 SDValue Op = N0.getOperand(0);
13755 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13756 AddToWorklist(Op.getNode());
13757 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13758 // Transfer the debug info; the new node is equivalent to N0.
13759 DAG.transferDbgValues(N0, ZExtOrTrunc);
13760 return ZExtOrTrunc;
13761 }
13762 }
13763
13764 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13765 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13766 AddToWorklist(Op.getNode());
13767 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13768 // We may safely transfer the debug info describing the truncate node over
13769 // to the equivalent and operation.
13770 DAG.transferDbgValues(N0, And);
13771 return And;
13772 }
13773 }
13774
13775 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13776 // if either of the casts is not free.
13777 if (N0.getOpcode() == ISD::AND &&
13778 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13779 N0.getOperand(1).getOpcode() == ISD::Constant &&
13780 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13781 !TLI.isZExtFree(N0.getValueType(), VT))) {
13782 SDValue X = N0.getOperand(0).getOperand(0);
13783 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13785 return DAG.getNode(ISD::AND, DL, VT,
13786 X, DAG.getConstant(Mask, DL, VT));
13787 }
13788
13789 // Try to simplify (zext (load x)).
13790 if (SDValue foldedExt = tryToFoldExtOfLoad(
13791 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13792 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13793 return foldedExt;
13794
13795 if (SDValue foldedExt =
13796 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13798 return foldedExt;
13799
13800 // fold (zext (load x)) to multiple smaller zextloads.
13801 // Only on illegal but splittable vectors.
13802 if (SDValue ExtLoad = CombineExtLoad(N))
13803 return ExtLoad;
13804
13805 // Try to simplify (zext (atomic_load x)).
13806 if (SDValue foldedExt =
13807 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13808 return foldedExt;
13809
13810 // fold (zext (and/or/xor (load x), cst)) ->
13811 // (and/or/xor (zextload x), (zext cst))
13812 // Unless (and (load x) cst) will match as a zextload already and has
13813 // additional users, or the zext is already free.
13814 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13815 isa<LoadSDNode>(N0.getOperand(0)) &&
13816 N0.getOperand(1).getOpcode() == ISD::Constant &&
13817 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13818 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13819 EVT MemVT = LN00->getMemoryVT();
13820 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13821 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13822 bool DoXform = true;
13824 if (!N0.hasOneUse()) {
13825 if (N0.getOpcode() == ISD::AND) {
13826 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13827 EVT LoadResultTy = AndC->getValueType(0);
13828 EVT ExtVT;
13829 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13830 DoXform = false;
13831 }
13832 }
13833 if (DoXform)
13834 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13835 ISD::ZERO_EXTEND, SetCCs, TLI);
13836 if (DoXform) {
13837 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13838 LN00->getChain(), LN00->getBasePtr(),
13839 LN00->getMemoryVT(),
13840 LN00->getMemOperand());
13842 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13843 ExtLoad, DAG.getConstant(Mask, DL, VT));
13844 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13845 bool NoReplaceTruncAnd = !N0.hasOneUse();
13846 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13847 CombineTo(N, And);
13848 // If N0 has multiple uses, change other uses as well.
13849 if (NoReplaceTruncAnd) {
13850 SDValue TruncAnd =
13852 CombineTo(N0.getNode(), TruncAnd);
13853 }
13854 if (NoReplaceTrunc) {
13855 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13856 } else {
13857 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13858 LN00->getValueType(0), ExtLoad);
13859 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13860 }
13861 return SDValue(N,0); // Return N so it doesn't get rechecked!
13862 }
13863 }
13864 }
13865
13866 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13867 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13868 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13869 return ZExtLoad;
13870
13871 // Try to simplify (zext (zextload x)).
13872 if (SDValue foldedExt = tryToFoldExtOfExtload(
13873 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13874 return foldedExt;
13875
13876 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13877 return V;
13878
13879 if (N0.getOpcode() == ISD::SETCC) {
13880 // Propagate fast-math-flags.
13881 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13882
13883 // Only do this before legalize for now.
13884 if (!LegalOperations && VT.isVector() &&
13885 N0.getValueType().getVectorElementType() == MVT::i1) {
13886 EVT N00VT = N0.getOperand(0).getValueType();
13887 if (getSetCCResultType(N00VT) == N0.getValueType())
13888 return SDValue();
13889
13890 // We know that the # elements of the results is the same as the #
13891 // elements of the compare (and the # elements of the compare result for
13892 // that matter). Check to see that they are the same size. If so, we know
13893 // that the element size of the sext'd result matches the element size of
13894 // the compare operands.
13895 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13896 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13897 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13898 N0.getOperand(1), N0.getOperand(2));
13899 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13900 }
13901
13902 // If the desired elements are smaller or larger than the source
13903 // elements we can use a matching integer vector type and then
13904 // truncate/any extend followed by zext_in_reg.
13905 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13906 SDValue VsetCC =
13907 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13908 N0.getOperand(1), N0.getOperand(2));
13909 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13910 N0.getValueType());
13911 }
13912
13913 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13914 EVT N0VT = N0.getValueType();
13915 EVT N00VT = N0.getOperand(0).getValueType();
13916 if (SDValue SCC = SimplifySelectCC(
13917 DL, N0.getOperand(0), N0.getOperand(1),
13918 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13919 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13920 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13921 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13922 }
13923
13924 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13925 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13926 !TLI.isZExtFree(N0, VT)) {
13927 SDValue ShVal = N0.getOperand(0);
13928 SDValue ShAmt = N0.getOperand(1);
13929 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13930 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13931 if (N0.getOpcode() == ISD::SHL) {
13932 // If the original shl may be shifting out bits, do not perform this
13933 // transformation.
13934 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13935 ShVal.getOperand(0).getValueSizeInBits();
13936 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13937 // If the shift is too large, then see if we can deduce that the
13938 // shift is safe anyway.
13939 // Create a mask that has ones for the bits being shifted out.
13940 APInt ShiftOutMask =
13942 ShAmtC->getAPIntValue().getZExtValue());
13943
13944 // Check if the bits being shifted out are known to be zero.
13945 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13946 return SDValue();
13947 }
13948 }
13949
13950 // Ensure that the shift amount is wide enough for the shifted value.
13951 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13952 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13953
13954 return DAG.getNode(N0.getOpcode(), DL, VT,
13955 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13956 }
13957 }
13958 }
13959
13960 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13961 return NewVSel;
13962
13963 if (SDValue NewCtPop = widenCtPop(N, DAG))
13964 return NewCtPop;
13965
13966 if (SDValue V = widenAbs(N, DAG))
13967 return V;
13968
13969 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13970 return Res;
13971
13972 // CSE zext nneg with sext if the zext is not free.
13973 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
13974 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
13975 if (CSENode)
13976 return SDValue(CSENode, 0);
13977 }
13978
13979 return SDValue();
13980}
13981
13982SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13983 SDValue N0 = N->getOperand(0);
13984 EVT VT = N->getValueType(0);
13985 SDLoc DL(N);
13986
13987 // aext(undef) = undef
13988 if (N0.isUndef())
13989 return DAG.getUNDEF(VT);
13990
13991 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13992 return Res;
13993
13994 // fold (aext (aext x)) -> (aext x)
13995 // fold (aext (zext x)) -> (zext x)
13996 // fold (aext (sext x)) -> (sext x)
13997 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
13998 N0.getOpcode() == ISD::SIGN_EXTEND) {
14000 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14001 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14002 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14003 }
14004
14005 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14006 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14007 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14011 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14012
14013 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14014 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14015 if (N0.getOpcode() == ISD::TRUNCATE) {
14016 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14017 SDNode *oye = N0.getOperand(0).getNode();
14018 if (NarrowLoad.getNode() != N0.getNode()) {
14019 CombineTo(N0.getNode(), NarrowLoad);
14020 // CombineTo deleted the truncate, if needed, but not what's under it.
14021 AddToWorklist(oye);
14022 }
14023 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14024 }
14025 }
14026
14027 // fold (aext (truncate x))
14028 if (N0.getOpcode() == ISD::TRUNCATE)
14029 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14030
14031 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14032 // if the trunc is not free.
14033 if (N0.getOpcode() == ISD::AND &&
14034 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14035 N0.getOperand(1).getOpcode() == ISD::Constant &&
14036 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14037 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14038 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14039 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14040 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14041 }
14042
14043 // fold (aext (load x)) -> (aext (truncate (extload x)))
14044 // None of the supported targets knows how to perform load and any_ext
14045 // on vectors in one instruction, so attempt to fold to zext instead.
14046 if (VT.isVector()) {
14047 // Try to simplify (zext (load x)).
14048 if (SDValue foldedExt =
14049 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14051 return foldedExt;
14052 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14054 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14055 bool DoXform = true;
14057 if (!N0.hasOneUse())
14058 DoXform =
14059 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14060 if (DoXform) {
14061 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14062 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14063 LN0->getBasePtr(), N0.getValueType(),
14064 LN0->getMemOperand());
14065 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14066 // If the load value is used only by N, replace it via CombineTo N.
14067 bool NoReplaceTrunc = N0.hasOneUse();
14068 CombineTo(N, ExtLoad);
14069 if (NoReplaceTrunc) {
14070 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14071 recursivelyDeleteUnusedNodes(LN0);
14072 } else {
14073 SDValue Trunc =
14074 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14075 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14076 }
14077 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14078 }
14079 }
14080
14081 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14082 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14083 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14084 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14085 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14086 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14087 ISD::LoadExtType ExtType = LN0->getExtensionType();
14088 EVT MemVT = LN0->getMemoryVT();
14089 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14090 SDValue ExtLoad =
14091 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14092 MemVT, LN0->getMemOperand());
14093 CombineTo(N, ExtLoad);
14094 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14095 recursivelyDeleteUnusedNodes(LN0);
14096 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14097 }
14098 }
14099
14100 if (N0.getOpcode() == ISD::SETCC) {
14101 // Propagate fast-math-flags.
14102 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14103
14104 // For vectors:
14105 // aext(setcc) -> vsetcc
14106 // aext(setcc) -> truncate(vsetcc)
14107 // aext(setcc) -> aext(vsetcc)
14108 // Only do this before legalize for now.
14109 if (VT.isVector() && !LegalOperations) {
14110 EVT N00VT = N0.getOperand(0).getValueType();
14111 if (getSetCCResultType(N00VT) == N0.getValueType())
14112 return SDValue();
14113
14114 // We know that the # elements of the results is the same as the
14115 // # elements of the compare (and the # elements of the compare result
14116 // for that matter). Check to see that they are the same size. If so,
14117 // we know that the element size of the sext'd result matches the
14118 // element size of the compare operands.
14119 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14120 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14121 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14122
14123 // If the desired elements are smaller or larger than the source
14124 // elements we can use a matching integer vector type and then
14125 // truncate/any extend
14126 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14127 SDValue VsetCC = DAG.getSetCC(
14128 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14129 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14130 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14131 }
14132
14133 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14134 if (SDValue SCC = SimplifySelectCC(
14135 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14136 DAG.getConstant(0, DL, VT),
14137 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14138 return SCC;
14139 }
14140
14141 if (SDValue NewCtPop = widenCtPop(N, DAG))
14142 return NewCtPop;
14143
14144 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14145 return Res;
14146
14147 return SDValue();
14148}
14149
14150SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14151 unsigned Opcode = N->getOpcode();
14152 SDValue N0 = N->getOperand(0);
14153 SDValue N1 = N->getOperand(1);
14154 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14155
14156 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14157 if (N0.getOpcode() == Opcode &&
14158 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14159 return N0;
14160
14161 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14162 N0.getOperand(0).getOpcode() == Opcode) {
14163 // We have an assert, truncate, assert sandwich. Make one stronger assert
14164 // by asserting on the smallest asserted type to the larger source type.
14165 // This eliminates the later assert:
14166 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14167 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14168 SDLoc DL(N);
14169 SDValue BigA = N0.getOperand(0);
14170 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14171 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14172 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14173 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14174 BigA.getOperand(0), MinAssertVTVal);
14175 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14176 }
14177
14178 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14179 // than X. Just move the AssertZext in front of the truncate and drop the
14180 // AssertSExt.
14181 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14183 Opcode == ISD::AssertZext) {
14184 SDValue BigA = N0.getOperand(0);
14185 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14186 if (AssertVT.bitsLT(BigA_AssertVT)) {
14187 SDLoc DL(N);
14188 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14189 BigA.getOperand(0), N1);
14190 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14191 }
14192 }
14193
14194 return SDValue();
14195}
14196
14197SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14198 SDLoc DL(N);
14199
14200 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14201 SDValue N0 = N->getOperand(0);
14202
14203 // Fold (assertalign (assertalign x, AL0), AL1) ->
14204 // (assertalign x, max(AL0, AL1))
14205 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14206 return DAG.getAssertAlign(DL, N0.getOperand(0),
14207 std::max(AL, AAN->getAlign()));
14208
14209 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14210 // this assert down to source operands so that those arithmetic ops could be
14211 // exposed to the DAG combining.
14212 switch (N0.getOpcode()) {
14213 default:
14214 break;
14215 case ISD::ADD:
14216 case ISD::SUB: {
14217 unsigned AlignShift = Log2(AL);
14218 SDValue LHS = N0.getOperand(0);
14219 SDValue RHS = N0.getOperand(1);
14220 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14221 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14222 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14223 if (LHSAlignShift < AlignShift)
14224 LHS = DAG.getAssertAlign(DL, LHS, AL);
14225 if (RHSAlignShift < AlignShift)
14226 RHS = DAG.getAssertAlign(DL, RHS, AL);
14227 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14228 }
14229 break;
14230 }
14231 }
14232
14233 return SDValue();
14234}
14235
14236/// If the result of a load is shifted/masked/truncated to an effectively
14237/// narrower type, try to transform the load to a narrower type and/or
14238/// use an extending load.
14239SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14240 unsigned Opc = N->getOpcode();
14241
14243 SDValue N0 = N->getOperand(0);
14244 EVT VT = N->getValueType(0);
14245 EVT ExtVT = VT;
14246
14247 // This transformation isn't valid for vector loads.
14248 if (VT.isVector())
14249 return SDValue();
14250
14251 // The ShAmt variable is used to indicate that we've consumed a right
14252 // shift. I.e. we want to narrow the width of the load by skipping to load the
14253 // ShAmt least significant bits.
14254 unsigned ShAmt = 0;
14255 // A special case is when the least significant bits from the load are masked
14256 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14257 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14258 // the result.
14259 unsigned ShiftedOffset = 0;
14260 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14261 // extended to VT.
14262 if (Opc == ISD::SIGN_EXTEND_INREG) {
14263 ExtType = ISD::SEXTLOAD;
14264 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14265 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14266 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14267 // value, or it may be shifting a higher subword, half or byte into the
14268 // lowest bits.
14269
14270 // Only handle shift with constant shift amount, and the shiftee must be a
14271 // load.
14272 auto *LN = dyn_cast<LoadSDNode>(N0);
14273 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14274 if (!N1C || !LN)
14275 return SDValue();
14276 // If the shift amount is larger than the memory type then we're not
14277 // accessing any of the loaded bytes.
14278 ShAmt = N1C->getZExtValue();
14279 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14280 if (MemoryWidth <= ShAmt)
14281 return SDValue();
14282 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14283 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14284 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14285 // If original load is a SEXTLOAD then we can't simply replace it by a
14286 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14287 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14288 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14289 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14290 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14291 LN->getExtensionType() != ExtType)
14292 return SDValue();
14293 } else if (Opc == ISD::AND) {
14294 // An AND with a constant mask is the same as a truncate + zero-extend.
14295 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14296 if (!AndC)
14297 return SDValue();
14298
14299 const APInt &Mask = AndC->getAPIntValue();
14300 unsigned ActiveBits = 0;
14301 if (Mask.isMask()) {
14302 ActiveBits = Mask.countr_one();
14303 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14304 ShiftedOffset = ShAmt;
14305 } else {
14306 return SDValue();
14307 }
14308
14309 ExtType = ISD::ZEXTLOAD;
14310 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14311 }
14312
14313 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14314 // a right shift. Here we redo some of those checks, to possibly adjust the
14315 // ExtVT even further based on "a masking AND". We could also end up here for
14316 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14317 // need to be done here as well.
14318 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14319 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14320 // Bail out when the SRL has more than one use. This is done for historical
14321 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14322 // check below? And maybe it could be non-profitable to do the transform in
14323 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14324 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14325 if (!SRL.hasOneUse())
14326 return SDValue();
14327
14328 // Only handle shift with constant shift amount, and the shiftee must be a
14329 // load.
14330 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14331 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14332 if (!SRL1C || !LN)
14333 return SDValue();
14334
14335 // If the shift amount is larger than the input type then we're not
14336 // accessing any of the loaded bytes. If the load was a zextload/extload
14337 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14338 ShAmt = SRL1C->getZExtValue();
14339 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14340 if (ShAmt >= MemoryWidth)
14341 return SDValue();
14342
14343 // Because a SRL must be assumed to *need* to zero-extend the high bits
14344 // (as opposed to anyext the high bits), we can't combine the zextload
14345 // lowering of SRL and an sextload.
14346 if (LN->getExtensionType() == ISD::SEXTLOAD)
14347 return SDValue();
14348
14349 // Avoid reading outside the memory accessed by the original load (could
14350 // happened if we only adjust the load base pointer by ShAmt). Instead we
14351 // try to narrow the load even further. The typical scenario here is:
14352 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14353 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14354 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14355 // Don't replace sextload by zextload.
14356 if (ExtType == ISD::SEXTLOAD)
14357 return SDValue();
14358 // Narrow the load.
14359 ExtType = ISD::ZEXTLOAD;
14360 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14361 }
14362
14363 // If the SRL is only used by a masking AND, we may be able to adjust
14364 // the ExtVT to make the AND redundant.
14365 SDNode *Mask = *(SRL->use_begin());
14366 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14367 isa<ConstantSDNode>(Mask->getOperand(1))) {
14368 unsigned Offset, ActiveBits;
14369 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14370 if (ShiftMask.isMask()) {
14371 EVT MaskedVT =
14372 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14373 // If the mask is smaller, recompute the type.
14374 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14375 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14376 ExtVT = MaskedVT;
14377 } else if (ExtType == ISD::ZEXTLOAD &&
14378 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14379 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14380 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14381 // If the mask is shifted we can use a narrower load and a shl to insert
14382 // the trailing zeros.
14383 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14384 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14385 ExtVT = MaskedVT;
14386 ShAmt = Offset + ShAmt;
14387 ShiftedOffset = Offset;
14388 }
14389 }
14390 }
14391
14392 N0 = SRL.getOperand(0);
14393 }
14394
14395 // If the load is shifted left (and the result isn't shifted back right), we
14396 // can fold a truncate through the shift. The typical scenario is that N
14397 // points at a TRUNCATE here so the attempted fold is:
14398 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14399 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14400 unsigned ShLeftAmt = 0;
14401 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14402 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14403 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14404 ShLeftAmt = N01->getZExtValue();
14405 N0 = N0.getOperand(0);
14406 }
14407 }
14408
14409 // If we haven't found a load, we can't narrow it.
14410 if (!isa<LoadSDNode>(N0))
14411 return SDValue();
14412
14413 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14414 // Reducing the width of a volatile load is illegal. For atomics, we may be
14415 // able to reduce the width provided we never widen again. (see D66309)
14416 if (!LN0->isSimple() ||
14417 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14418 return SDValue();
14419
14420 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14421 unsigned LVTStoreBits =
14423 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14424 return LVTStoreBits - EVTStoreBits - ShAmt;
14425 };
14426
14427 // We need to adjust the pointer to the load by ShAmt bits in order to load
14428 // the correct bytes.
14429 unsigned PtrAdjustmentInBits =
14430 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14431
14432 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14433 SDLoc DL(LN0);
14434 // The original load itself didn't wrap, so an offset within it doesn't.
14436 Flags.setNoUnsignedWrap(true);
14437 SDValue NewPtr = DAG.getMemBasePlusOffset(
14438 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14439 AddToWorklist(NewPtr.getNode());
14440
14441 SDValue Load;
14442 if (ExtType == ISD::NON_EXTLOAD)
14443 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14444 LN0->getPointerInfo().getWithOffset(PtrOff),
14445 LN0->getOriginalAlign(),
14446 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14447 else
14448 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14449 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14450 LN0->getOriginalAlign(),
14451 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14452
14453 // Replace the old load's chain with the new load's chain.
14454 WorklistRemover DeadNodes(*this);
14455 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14456
14457 // Shift the result left, if we've swallowed a left shift.
14459 if (ShLeftAmt != 0) {
14460 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14461 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14462 ShImmTy = VT;
14463 // If the shift amount is as large as the result size (but, presumably,
14464 // no larger than the source) then the useful bits of the result are
14465 // zero; we can't simply return the shortened shift, because the result
14466 // of that operation is undefined.
14467 if (ShLeftAmt >= VT.getScalarSizeInBits())
14468 Result = DAG.getConstant(0, DL, VT);
14469 else
14470 Result = DAG.getNode(ISD::SHL, DL, VT,
14471 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14472 }
14473
14474 if (ShiftedOffset != 0) {
14475 // We're using a shifted mask, so the load now has an offset. This means
14476 // that data has been loaded into the lower bytes than it would have been
14477 // before, so we need to shl the loaded data into the correct position in the
14478 // register.
14479 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14480 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14481 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14482 }
14483
14484 // Return the new loaded value.
14485 return Result;
14486}
14487
14488SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14489 SDValue N0 = N->getOperand(0);
14490 SDValue N1 = N->getOperand(1);
14491 EVT VT = N->getValueType(0);
14492 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14493 unsigned VTBits = VT.getScalarSizeInBits();
14494 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14495
14496 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14497 if (N0.isUndef())
14498 return DAG.getConstant(0, SDLoc(N), VT);
14499
14500 // fold (sext_in_reg c1) -> c1
14502 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14503
14504 // If the input is already sign extended, just drop the extension.
14505 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14506 return N0;
14507
14508 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14509 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14510 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14511 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14512 N1);
14513
14514 // fold (sext_in_reg (sext x)) -> (sext x)
14515 // fold (sext_in_reg (aext x)) -> (sext x)
14516 // if x is small enough or if we know that x has more than 1 sign bit and the
14517 // sign_extend_inreg is extending from one of them.
14518 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14519 SDValue N00 = N0.getOperand(0);
14520 unsigned N00Bits = N00.getScalarValueSizeInBits();
14521 if ((N00Bits <= ExtVTBits ||
14522 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14523 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14524 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14525 }
14526
14527 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14528 // if x is small enough or if we know that x has more than 1 sign bit and the
14529 // sign_extend_inreg is extending from one of them.
14531 SDValue N00 = N0.getOperand(0);
14532 unsigned N00Bits = N00.getScalarValueSizeInBits();
14533 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14534 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14535 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14536 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14537 if ((N00Bits == ExtVTBits ||
14538 (!IsZext && (N00Bits < ExtVTBits ||
14539 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14540 (!LegalOperations ||
14542 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14543 }
14544
14545 // fold (sext_in_reg (zext x)) -> (sext x)
14546 // iff we are extending the source sign bit.
14547 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14548 SDValue N00 = N0.getOperand(0);
14549 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14550 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14551 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14552 }
14553
14554 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14555 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14556 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14557
14558 // fold operands of sext_in_reg based on knowledge that the top bits are not
14559 // demanded.
14561 return SDValue(N, 0);
14562
14563 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14564 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14565 if (SDValue NarrowLoad = reduceLoadWidth(N))
14566 return NarrowLoad;
14567
14568 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14569 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14570 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14571 if (N0.getOpcode() == ISD::SRL) {
14572 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14573 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14574 // We can turn this into an SRA iff the input to the SRL is already sign
14575 // extended enough.
14576 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14577 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14578 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14579 N0.getOperand(1));
14580 }
14581 }
14582
14583 // fold (sext_inreg (extload x)) -> (sextload x)
14584 // If sextload is not supported by target, we can only do the combine when
14585 // load has one use. Doing otherwise can block folding the extload with other
14586 // extends that the target does support.
14587 if (ISD::isEXTLoad(N0.getNode()) &&
14589 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14590 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14591 N0.hasOneUse()) ||
14592 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14593 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14594 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14595 LN0->getChain(),
14596 LN0->getBasePtr(), ExtVT,
14597 LN0->getMemOperand());
14598 CombineTo(N, ExtLoad);
14599 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14600 AddToWorklist(ExtLoad.getNode());
14601 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14602 }
14603
14604 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14606 N0.hasOneUse() &&
14607 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14608 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14609 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14610 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14611 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14612 LN0->getChain(),
14613 LN0->getBasePtr(), ExtVT,
14614 LN0->getMemOperand());
14615 CombineTo(N, ExtLoad);
14616 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14617 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14618 }
14619
14620 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14621 // ignore it if the masked load is already sign extended
14622 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14623 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14624 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14625 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14626 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14627 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14628 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14629 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14630 CombineTo(N, ExtMaskedLoad);
14631 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14632 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14633 }
14634 }
14635
14636 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14637 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14638 if (SDValue(GN0, 0).hasOneUse() &&
14639 ExtVT == GN0->getMemoryVT() &&
14641 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14642 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14643
14644 SDValue ExtLoad = DAG.getMaskedGather(
14645 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14646 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14647
14648 CombineTo(N, ExtLoad);
14649 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14650 AddToWorklist(ExtLoad.getNode());
14651 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14652 }
14653 }
14654
14655 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14656 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14657 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14658 N0.getOperand(1), false))
14659 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14660 }
14661
14662 // Fold (iM_signext_inreg
14663 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14664 // from iN)
14665 // -> (extract_subvector (signext iN_v to iM))
14666 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14668 SDValue InnerExt = N0.getOperand(0);
14669 EVT InnerExtVT = InnerExt->getValueType(0);
14670 SDValue Extendee = InnerExt->getOperand(0);
14671
14672 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14673 (!LegalOperations ||
14674 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14675 SDValue SignExtExtendee =
14676 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14677 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14678 N0.getOperand(1));
14679 }
14680 }
14681
14682 return SDValue();
14683}
14684
14686 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14687 bool LegalOperations) {
14688 unsigned InregOpcode = N->getOpcode();
14689 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14690
14691 SDValue Src = N->getOperand(0);
14692 EVT VT = N->getValueType(0);
14693 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14694 Src.getValueType().getVectorElementType(),
14696
14697 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14698 "Expected EXTEND_VECTOR_INREG dag node in input!");
14699
14700 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14701 // FIXME: one-use check may be overly restrictive
14702 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14703 return SDValue();
14704
14705 // Profitability check: we must be extending exactly one of it's operands.
14706 // FIXME: this is probably overly restrictive.
14707 Src = Src.getOperand(0);
14708 if (Src.getValueType() != SrcVT)
14709 return SDValue();
14710
14711 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14712 return SDValue();
14713
14714 return DAG.getNode(Opcode, DL, VT, Src);
14715}
14716
14717SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14718 SDValue N0 = N->getOperand(0);
14719 EVT VT = N->getValueType(0);
14720 SDLoc DL(N);
14721
14722 if (N0.isUndef()) {
14723 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14724 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14725 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14726 ? DAG.getUNDEF(VT)
14727 : DAG.getConstant(0, DL, VT);
14728 }
14729
14730 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14731 return Res;
14732
14734 return SDValue(N, 0);
14735
14737 LegalOperations))
14738 return R;
14739
14740 return SDValue();
14741}
14742
14743SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14744 SDValue N0 = N->getOperand(0);
14745 EVT VT = N->getValueType(0);
14746 EVT SrcVT = N0.getValueType();
14747 bool isLE = DAG.getDataLayout().isLittleEndian();
14748 SDLoc DL(N);
14749
14750 // trunc(undef) = undef
14751 if (N0.isUndef())
14752 return DAG.getUNDEF(VT);
14753
14754 // fold (truncate (truncate x)) -> (truncate x)
14755 if (N0.getOpcode() == ISD::TRUNCATE)
14756 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14757
14758 // fold (truncate c1) -> c1
14759 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14760 return C;
14761
14762 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14763 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14764 N0.getOpcode() == ISD::SIGN_EXTEND ||
14765 N0.getOpcode() == ISD::ANY_EXTEND) {
14766 // if the source is smaller than the dest, we still need an extend.
14767 if (N0.getOperand(0).getValueType().bitsLT(VT))
14768 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14769 // if the source is larger than the dest, than we just need the truncate.
14770 if (N0.getOperand(0).getValueType().bitsGT(VT))
14771 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14772 // if the source and dest are the same type, we can drop both the extend
14773 // and the truncate.
14774 return N0.getOperand(0);
14775 }
14776
14777 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14778 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14779 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14780 N0.hasOneUse()) {
14781 SDValue X = N0.getOperand(0);
14782 SDValue ExtVal = N0.getOperand(1);
14783 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14784 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14785 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14786 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14787 }
14788 }
14789
14790 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14791 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14792 return SDValue();
14793
14794 // Fold extract-and-trunc into a narrow extract. For example:
14795 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14796 // i32 y = TRUNCATE(i64 x)
14797 // -- becomes --
14798 // v16i8 b = BITCAST (v2i64 val)
14799 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14800 //
14801 // Note: We only run this optimization after type legalization (which often
14802 // creates this pattern) and before operation legalization after which
14803 // we need to be more careful about the vector instructions that we generate.
14804 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14805 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14806 EVT VecTy = N0.getOperand(0).getValueType();
14807 EVT ExTy = N0.getValueType();
14808 EVT TrTy = N->getValueType(0);
14809
14810 auto EltCnt = VecTy.getVectorElementCount();
14811 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14812 auto NewEltCnt = EltCnt * SizeRatio;
14813
14814 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14815 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14816
14817 SDValue EltNo = N0->getOperand(1);
14818 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14819 int Elt = EltNo->getAsZExtVal();
14820 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14821 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14822 DAG.getBitcast(NVT, N0.getOperand(0)),
14824 }
14825 }
14826
14827 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14828 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14829 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14830 TLI.isTruncateFree(SrcVT, VT)) {
14831 SDLoc SL(N0);
14832 SDValue Cond = N0.getOperand(0);
14833 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14834 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14835 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14836 }
14837 }
14838
14839 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14840 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14841 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14842 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14843 SDValue Amt = N0.getOperand(1);
14844 KnownBits Known = DAG.computeKnownBits(Amt);
14845 unsigned Size = VT.getScalarSizeInBits();
14846 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14847 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14848 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14849 if (AmtVT != Amt.getValueType()) {
14850 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14851 AddToWorklist(Amt.getNode());
14852 }
14853 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14854 }
14855 }
14856
14857 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14858 return V;
14859
14860 if (SDValue ABD = foldABSToABD(N, DL))
14861 return ABD;
14862
14863 // Attempt to pre-truncate BUILD_VECTOR sources.
14864 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14865 N0.hasOneUse() &&
14866 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14867 // Avoid creating illegal types if running after type legalizer.
14868 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14869 EVT SVT = VT.getScalarType();
14870 SmallVector<SDValue, 8> TruncOps;
14871 for (const SDValue &Op : N0->op_values()) {
14872 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14873 TruncOps.push_back(TruncOp);
14874 }
14875 return DAG.getBuildVector(VT, DL, TruncOps);
14876 }
14877
14878 // trunc (splat_vector x) -> splat_vector (trunc x)
14879 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14880 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14881 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14882 EVT SVT = VT.getScalarType();
14883 return DAG.getSplatVector(
14884 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14885 }
14886
14887 // Fold a series of buildvector, bitcast, and truncate if possible.
14888 // For example fold
14889 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14890 // (2xi32 (buildvector x, y)).
14891 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14892 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14894 N0.getOperand(0).hasOneUse()) {
14895 SDValue BuildVect = N0.getOperand(0);
14896 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14897 EVT TruncVecEltTy = VT.getVectorElementType();
14898
14899 // Check that the element types match.
14900 if (BuildVectEltTy == TruncVecEltTy) {
14901 // Now we only need to compute the offset of the truncated elements.
14902 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14903 unsigned TruncVecNumElts = VT.getVectorNumElements();
14904 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14905
14906 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14907 "Invalid number of elements");
14908
14910 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14911 Opnds.push_back(BuildVect.getOperand(i));
14912
14913 return DAG.getBuildVector(VT, DL, Opnds);
14914 }
14915 }
14916
14917 // fold (truncate (load x)) -> (smaller load x)
14918 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14919 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14920 if (SDValue Reduced = reduceLoadWidth(N))
14921 return Reduced;
14922
14923 // Handle the case where the truncated result is at least as wide as the
14924 // loaded type.
14925 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14926 auto *LN0 = cast<LoadSDNode>(N0);
14927 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14928 SDValue NewLoad = DAG.getExtLoad(
14929 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14930 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14931 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14932 return NewLoad;
14933 }
14934 }
14935 }
14936
14937 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14938 // where ... are all 'undef'.
14939 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14941 SDValue V;
14942 unsigned Idx = 0;
14943 unsigned NumDefs = 0;
14944
14945 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14946 SDValue X = N0.getOperand(i);
14947 if (!X.isUndef()) {
14948 V = X;
14949 Idx = i;
14950 NumDefs++;
14951 }
14952 // Stop if more than one members are non-undef.
14953 if (NumDefs > 1)
14954 break;
14955
14958 X.getValueType().getVectorElementCount()));
14959 }
14960
14961 if (NumDefs == 0)
14962 return DAG.getUNDEF(VT);
14963
14964 if (NumDefs == 1) {
14965 assert(V.getNode() && "The single defined operand is empty!");
14967 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14968 if (i != Idx) {
14969 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14970 continue;
14971 }
14972 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14973 AddToWorklist(NV.getNode());
14974 Opnds.push_back(NV);
14975 }
14976 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
14977 }
14978 }
14979
14980 // Fold truncate of a bitcast of a vector to an extract of the low vector
14981 // element.
14982 //
14983 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14984 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14985 SDValue VecSrc = N0.getOperand(0);
14986 EVT VecSrcVT = VecSrc.getValueType();
14987 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14988 (!LegalOperations ||
14989 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14990 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14991 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
14993 }
14994 }
14995
14996 // Simplify the operands using demanded-bits information.
14998 return SDValue(N, 0);
14999
15000 // fold (truncate (extract_subvector(ext x))) ->
15001 // (extract_subvector x)
15002 // TODO: This can be generalized to cover cases where the truncate and extract
15003 // do not fully cancel each other out.
15004 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15005 SDValue N00 = N0.getOperand(0);
15006 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15007 N00.getOpcode() == ISD::ZERO_EXTEND ||
15008 N00.getOpcode() == ISD::ANY_EXTEND) {
15009 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15011 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15012 N00.getOperand(0), N0.getOperand(1));
15013 }
15014 }
15015
15016 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15017 return NewVSel;
15018
15019 // Narrow a suitable binary operation with a non-opaque constant operand by
15020 // moving it ahead of the truncate. This is limited to pre-legalization
15021 // because targets may prefer a wider type during later combines and invert
15022 // this transform.
15023 switch (N0.getOpcode()) {
15024 case ISD::ADD:
15025 case ISD::SUB:
15026 case ISD::MUL:
15027 case ISD::AND:
15028 case ISD::OR:
15029 case ISD::XOR:
15030 if (!LegalOperations && N0.hasOneUse() &&
15031 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15032 isConstantOrConstantVector(N0.getOperand(1), true))) {
15033 // TODO: We already restricted this to pre-legalization, but for vectors
15034 // we are extra cautious to not create an unsupported operation.
15035 // Target-specific changes are likely needed to avoid regressions here.
15036 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15037 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15038 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15039 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15040 }
15041 }
15042 break;
15043 case ISD::ADDE:
15044 case ISD::UADDO_CARRY:
15045 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15046 // (trunc uaddo_carry(X, Y, Carry)) ->
15047 // (uaddo_carry trunc(X), trunc(Y), Carry)
15048 // When the adde's carry is not used.
15049 // We only do for uaddo_carry before legalize operation
15050 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15051 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15052 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15053 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15054 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15055 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15056 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15057 }
15058 break;
15059 case ISD::USUBSAT:
15060 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15061 // enough to know that the upper bits are zero we must ensure that we don't
15062 // introduce an extra truncate.
15063 if (!LegalOperations && N0.hasOneUse() &&
15066 VT.getScalarSizeInBits() &&
15067 hasOperation(N0.getOpcode(), VT)) {
15068 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15069 DAG, DL);
15070 }
15071 break;
15072 }
15073
15074 return SDValue();
15075}
15076
15077static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15078 SDValue Elt = N->getOperand(i);
15079 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15080 return Elt.getNode();
15081 return Elt.getOperand(Elt.getResNo()).getNode();
15082}
15083
15084/// build_pair (load, load) -> load
15085/// if load locations are consecutive.
15086SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15087 assert(N->getOpcode() == ISD::BUILD_PAIR);
15088
15089 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15090 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15091
15092 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15093 // most significant part in elt 1. So when combining into one large load, we
15094 // need to consider the endianness.
15095 if (DAG.getDataLayout().isBigEndian())
15096 std::swap(LD1, LD2);
15097
15098 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15099 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15100 LD1->getAddressSpace() != LD2->getAddressSpace())
15101 return SDValue();
15102
15103 unsigned LD1Fast = 0;
15104 EVT LD1VT = LD1->getValueType(0);
15105 unsigned LD1Bytes = LD1VT.getStoreSize();
15106 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15107 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15108 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15109 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15110 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15111 LD1->getPointerInfo(), LD1->getAlign());
15112
15113 return SDValue();
15114}
15115
15116static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15117 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15118 // and Lo parts; on big-endian machines it doesn't.
15119 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15120}
15121
15122SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15123 const TargetLowering &TLI) {
15124 // If this is not a bitcast to an FP type or if the target doesn't have
15125 // IEEE754-compliant FP logic, we're done.
15126 EVT VT = N->getValueType(0);
15127 SDValue N0 = N->getOperand(0);
15128 EVT SourceVT = N0.getValueType();
15129
15130 if (!VT.isFloatingPoint())
15131 return SDValue();
15132
15133 // TODO: Handle cases where the integer constant is a different scalar
15134 // bitwidth to the FP.
15135 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15136 return SDValue();
15137
15138 unsigned FPOpcode;
15139 APInt SignMask;
15140 switch (N0.getOpcode()) {
15141 case ISD::AND:
15142 FPOpcode = ISD::FABS;
15143 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15144 break;
15145 case ISD::XOR:
15146 FPOpcode = ISD::FNEG;
15147 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15148 break;
15149 case ISD::OR:
15150 FPOpcode = ISD::FABS;
15151 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15152 break;
15153 default:
15154 return SDValue();
15155 }
15156
15157 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15158 return SDValue();
15159
15160 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15161 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15162 // removing this would require more changes.
15163 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15164 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15165 return true;
15166
15167 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15168 };
15169
15170 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15171 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15172 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15173 // fneg (fabs X)
15174 SDValue LogicOp0 = N0.getOperand(0);
15175 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15176 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15177 IsBitCastOrFree(LogicOp0, VT)) {
15178 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15179 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15180 NumFPLogicOpsConv++;
15181 if (N0.getOpcode() == ISD::OR)
15182 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15183 return FPOp;
15184 }
15185
15186 return SDValue();
15187}
15188
15189SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15190 SDValue N0 = N->getOperand(0);
15191 EVT VT = N->getValueType(0);
15192
15193 if (N0.isUndef())
15194 return DAG.getUNDEF(VT);
15195
15196 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15197 // Only do this before legalize types, unless both types are integer and the
15198 // scalar type is legal. Only do this before legalize ops, since the target
15199 // maybe depending on the bitcast.
15200 // First check to see if this is all constant.
15201 // TODO: Support FP bitcasts after legalize types.
15202 if (VT.isVector() &&
15203 (!LegalTypes ||
15204 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15205 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15206 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15207 cast<BuildVectorSDNode>(N0)->isConstant())
15208 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15210
15211 // If the input is a constant, let getNode fold it.
15212 if (isIntOrFPConstant(N0)) {
15213 // If we can't allow illegal operations, we need to check that this is just
15214 // a fp -> int or int -> conversion and that the resulting operation will
15215 // be legal.
15216 if (!LegalOperations ||
15217 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15219 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15220 TLI.isOperationLegal(ISD::Constant, VT))) {
15221 SDValue C = DAG.getBitcast(VT, N0);
15222 if (C.getNode() != N)
15223 return C;
15224 }
15225 }
15226
15227 // (conv (conv x, t1), t2) -> (conv x, t2)
15228 if (N0.getOpcode() == ISD::BITCAST)
15229 return DAG.getBitcast(VT, N0.getOperand(0));
15230
15231 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15232 // iff the current bitwise logicop type isn't legal
15233 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15234 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15235 auto IsFreeBitcast = [VT](SDValue V) {
15236 return (V.getOpcode() == ISD::BITCAST &&
15237 V.getOperand(0).getValueType() == VT) ||
15239 V->hasOneUse());
15240 };
15241 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15242 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15243 DAG.getBitcast(VT, N0.getOperand(0)),
15244 DAG.getBitcast(VT, N0.getOperand(1)));
15245 }
15246
15247 // fold (conv (load x)) -> (load (conv*)x)
15248 // If the resultant load doesn't need a higher alignment than the original!
15249 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15250 // Do not remove the cast if the types differ in endian layout.
15252 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15253 // If the load is volatile, we only want to change the load type if the
15254 // resulting load is legal. Otherwise we might increase the number of
15255 // memory accesses. We don't care if the original type was legal or not
15256 // as we assume software couldn't rely on the number of accesses of an
15257 // illegal type.
15258 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15259 TLI.isOperationLegal(ISD::LOAD, VT))) {
15260 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15261
15262 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15263 *LN0->getMemOperand())) {
15264 SDValue Load =
15265 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15266 LN0->getMemOperand());
15267 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15268 return Load;
15269 }
15270 }
15271
15272 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15273 return V;
15274
15275 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15276 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15277 //
15278 // For ppc_fp128:
15279 // fold (bitcast (fneg x)) ->
15280 // flipbit = signbit
15281 // (xor (bitcast x) (build_pair flipbit, flipbit))
15282 //
15283 // fold (bitcast (fabs x)) ->
15284 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15285 // (xor (bitcast x) (build_pair flipbit, flipbit))
15286 // This often reduces constant pool loads.
15287 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15288 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15289 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15290 !N0.getValueType().isVector()) {
15291 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15292 AddToWorklist(NewConv.getNode());
15293
15294 SDLoc DL(N);
15295 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15296 assert(VT.getSizeInBits() == 128);
15297 SDValue SignBit = DAG.getConstant(
15298 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15299 SDValue FlipBit;
15300 if (N0.getOpcode() == ISD::FNEG) {
15301 FlipBit = SignBit;
15302 AddToWorklist(FlipBit.getNode());
15303 } else {
15304 assert(N0.getOpcode() == ISD::FABS);
15305 SDValue Hi =
15306 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15308 SDLoc(NewConv)));
15309 AddToWorklist(Hi.getNode());
15310 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15311 AddToWorklist(FlipBit.getNode());
15312 }
15313 SDValue FlipBits =
15314 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15315 AddToWorklist(FlipBits.getNode());
15316 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15317 }
15318 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15319 if (N0.getOpcode() == ISD::FNEG)
15320 return DAG.getNode(ISD::XOR, DL, VT,
15321 NewConv, DAG.getConstant(SignBit, DL, VT));
15322 assert(N0.getOpcode() == ISD::FABS);
15323 return DAG.getNode(ISD::AND, DL, VT,
15324 NewConv, DAG.getConstant(~SignBit, DL, VT));
15325 }
15326
15327 // fold (bitconvert (fcopysign cst, x)) ->
15328 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15329 // Note that we don't handle (copysign x, cst) because this can always be
15330 // folded to an fneg or fabs.
15331 //
15332 // For ppc_fp128:
15333 // fold (bitcast (fcopysign cst, x)) ->
15334 // flipbit = (and (extract_element
15335 // (xor (bitcast cst), (bitcast x)), 0),
15336 // signbit)
15337 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15338 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15339 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15340 !VT.isVector()) {
15341 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15342 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15343 if (isTypeLegal(IntXVT)) {
15344 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15345 AddToWorklist(X.getNode());
15346
15347 // If X has a different width than the result/lhs, sext it or truncate it.
15348 unsigned VTWidth = VT.getSizeInBits();
15349 if (OrigXWidth < VTWidth) {
15350 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15351 AddToWorklist(X.getNode());
15352 } else if (OrigXWidth > VTWidth) {
15353 // To get the sign bit in the right place, we have to shift it right
15354 // before truncating.
15355 SDLoc DL(X);
15356 X = DAG.getNode(ISD::SRL, DL,
15357 X.getValueType(), X,
15358 DAG.getConstant(OrigXWidth-VTWidth, DL,
15359 X.getValueType()));
15360 AddToWorklist(X.getNode());
15361 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15362 AddToWorklist(X.getNode());
15363 }
15364
15365 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15366 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15367 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15368 AddToWorklist(Cst.getNode());
15369 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15370 AddToWorklist(X.getNode());
15371 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15372 AddToWorklist(XorResult.getNode());
15373 SDValue XorResult64 = DAG.getNode(
15374 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15376 SDLoc(XorResult)));
15377 AddToWorklist(XorResult64.getNode());
15378 SDValue FlipBit =
15379 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15380 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15381 AddToWorklist(FlipBit.getNode());
15382 SDValue FlipBits =
15383 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15384 AddToWorklist(FlipBits.getNode());
15385 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15386 }
15387 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15388 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15389 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15390 AddToWorklist(X.getNode());
15391
15392 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15393 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15394 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15395 AddToWorklist(Cst.getNode());
15396
15397 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15398 }
15399 }
15400
15401 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15402 if (N0.getOpcode() == ISD::BUILD_PAIR)
15403 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15404 return CombineLD;
15405
15406 // Remove double bitcasts from shuffles - this is often a legacy of
15407 // XformToShuffleWithZero being used to combine bitmaskings (of
15408 // float vectors bitcast to integer vectors) into shuffles.
15409 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15410 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15411 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15414 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15415
15416 // If operands are a bitcast, peek through if it casts the original VT.
15417 // If operands are a constant, just bitcast back to original VT.
15418 auto PeekThroughBitcast = [&](SDValue Op) {
15419 if (Op.getOpcode() == ISD::BITCAST &&
15420 Op.getOperand(0).getValueType() == VT)
15421 return SDValue(Op.getOperand(0));
15422 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15423 return DAG.getBitcast(VT, Op);
15424 return SDValue();
15425 };
15426
15427 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15428 // the result type of this bitcast. This would eliminate at least one
15429 // bitcast. See the transform in InstCombine.
15430 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15431 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15432 if (!(SV0 && SV1))
15433 return SDValue();
15434
15435 int MaskScale =
15437 SmallVector<int, 8> NewMask;
15438 for (int M : SVN->getMask())
15439 for (int i = 0; i != MaskScale; ++i)
15440 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15441
15442 SDValue LegalShuffle =
15443 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15444 if (LegalShuffle)
15445 return LegalShuffle;
15446 }
15447
15448 return SDValue();
15449}
15450
15451SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15452 EVT VT = N->getValueType(0);
15453 return CombineConsecutiveLoads(N, VT);
15454}
15455
15456SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15457 SDValue N0 = N->getOperand(0);
15458
15459 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15460 return N0;
15461
15462 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15463 // Try to push freeze through instructions that propagate but don't produce
15464 // poison as far as possible. If an operand of freeze follows three
15465 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15466 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15467 // the freeze through to the operands that are not guaranteed non-poison.
15468 // NOTE: we will strip poison-generating flags, so ignore them here.
15469 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15470 /*ConsiderFlags*/ false) ||
15471 N0->getNumValues() != 1 || !N0->hasOneUse())
15472 return SDValue();
15473
15474 bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15475 N0.getOpcode() == ISD::BUILD_PAIR ||
15477
15478 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15479 // ones" or "constant" into something that depends on FrozenUndef. We can
15480 // instead pick undef values to keep those properties, while at the same time
15481 // folding away the freeze.
15482 // If we implement a more general solution for folding away freeze(undef) in
15483 // the future, then this special handling can be removed.
15484 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15485 SDLoc DL(N0);
15486 MVT VT = N0.getSimpleValueType();
15488 return DAG.getAllOnesConstant(DL, VT);
15491 for (const SDValue &Op : N0->op_values())
15492 NewVecC.push_back(
15493 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15494 return DAG.getBuildVector(VT, DL, NewVecC);
15495 }
15496 }
15497
15498 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15499 for (SDValue Op : N0->ops()) {
15500 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15501 /*Depth*/ 1))
15502 continue;
15503 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15504 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15505 if (!HadMaybePoisonOperands)
15506 continue;
15507 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15508 // Multiple maybe-poison ops when not allowed - bail out.
15509 return SDValue();
15510 }
15511 }
15512 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15513 // it could create undef or poison due to it's poison-generating flags.
15514 // So not finding any maybe-poison operands is fine.
15515
15516 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15517 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15518 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15519 continue;
15520 // First, freeze each offending operand.
15521 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15522 // Then, change all other uses of unfrozen operand to use frozen operand.
15523 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15524 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15525 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15526 // But, that also updated the use in the freeze we just created, thus
15527 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15528 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15529 MaybePoisonOperand);
15530 }
15531 }
15532
15533 // This node has been merged with another.
15534 if (N->getOpcode() == ISD::DELETED_NODE)
15535 return SDValue(N, 0);
15536
15537 // The whole node may have been updated, so the value we were holding
15538 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15539 N0 = N->getOperand(0);
15540
15541 // Finally, recreate the node, it's operands were updated to use
15542 // frozen operands, so we just need to use it's "original" operands.
15543 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15544 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15545 for (SDValue &Op : Ops) {
15546 if (Op.getOpcode() == ISD::UNDEF)
15547 Op = DAG.getFreeze(Op);
15548 }
15549 // NOTE: this strips poison generating flags.
15550 SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15551 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15552 "Can't create node that may be undef/poison!");
15553 return R;
15554}
15555
15556/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15557/// operands. DstEltVT indicates the destination element value type.
15558SDValue DAGCombiner::
15559ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15560 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15561
15562 // If this is already the right type, we're done.
15563 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15564
15565 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15566 unsigned DstBitSize = DstEltVT.getSizeInBits();
15567
15568 // If this is a conversion of N elements of one type to N elements of another
15569 // type, convert each element. This handles FP<->INT cases.
15570 if (SrcBitSize == DstBitSize) {
15572 for (SDValue Op : BV->op_values()) {
15573 // If the vector element type is not legal, the BUILD_VECTOR operands
15574 // are promoted and implicitly truncated. Make that explicit here.
15575 if (Op.getValueType() != SrcEltVT)
15576 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15577 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15578 AddToWorklist(Ops.back().getNode());
15579 }
15580 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15582 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15583 }
15584
15585 // Otherwise, we're growing or shrinking the elements. To avoid having to
15586 // handle annoying details of growing/shrinking FP values, we convert them to
15587 // int first.
15588 if (SrcEltVT.isFloatingPoint()) {
15589 // Convert the input float vector to a int vector where the elements are the
15590 // same sizes.
15591 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15592 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15593 SrcEltVT = IntVT;
15594 }
15595
15596 // Now we know the input is an integer vector. If the output is a FP type,
15597 // convert to integer first, then to FP of the right size.
15598 if (DstEltVT.isFloatingPoint()) {
15599 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15600 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15601
15602 // Next, convert to FP elements of the same size.
15603 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15604 }
15605
15606 // Okay, we know the src/dst types are both integers of differing types.
15607 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15608
15609 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15610 // BuildVectorSDNode?
15611 auto *BVN = cast<BuildVectorSDNode>(BV);
15612
15613 // Extract the constant raw bit data.
15614 BitVector UndefElements;
15615 SmallVector<APInt> RawBits;
15616 bool IsLE = DAG.getDataLayout().isLittleEndian();
15617 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15618 return SDValue();
15619
15620 SDLoc DL(BV);
15622 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15623 if (UndefElements[I])
15624 Ops.push_back(DAG.getUNDEF(DstEltVT));
15625 else
15626 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15627 }
15628
15629 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15630 return DAG.getBuildVector(VT, DL, Ops);
15631}
15632
15633// Returns true if floating point contraction is allowed on the FMUL-SDValue
15634// `N`
15636 assert(N.getOpcode() == ISD::FMUL);
15637
15638 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15639 N->getFlags().hasAllowContract();
15640}
15641
15642// Returns true if `N` can assume no infinities involved in its computation.
15644 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15645}
15646
15647/// Try to perform FMA combining on a given FADD node.
15648template <class MatchContextClass>
15649SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15650 SDValue N0 = N->getOperand(0);
15651 SDValue N1 = N->getOperand(1);
15652 EVT VT = N->getValueType(0);
15653 SDLoc SL(N);
15654 MatchContextClass matcher(DAG, TLI, N);
15655 const TargetOptions &Options = DAG.getTarget().Options;
15656
15657 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15658
15659 // Floating-point multiply-add with intermediate rounding.
15660 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15661 // FIXME: Add VP_FMAD opcode.
15662 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15663
15664 // Floating-point multiply-add without intermediate rounding.
15665 bool HasFMA =
15667 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15668
15669 // No valid opcode, do not combine.
15670 if (!HasFMAD && !HasFMA)
15671 return SDValue();
15672
15673 bool CanReassociate =
15674 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15675 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15676 Options.UnsafeFPMath || HasFMAD);
15677 // If the addition is not contractable, do not combine.
15678 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15679 return SDValue();
15680
15681 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15682 // beneficial. It does not reduce latency. It increases register pressure. It
15683 // replaces an fadd with an fma which is a more complex instruction, so is
15684 // likely to have a larger encoding, use more functional units, etc.
15685 if (N0 == N1)
15686 return SDValue();
15687
15688 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15689 return SDValue();
15690
15691 // Always prefer FMAD to FMA for precision.
15692 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15694
15695 auto isFusedOp = [&](SDValue N) {
15696 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15697 };
15698
15699 // Is the node an FMUL and contractable either due to global flags or
15700 // SDNodeFlags.
15701 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15702 if (!matcher.match(N, ISD::FMUL))
15703 return false;
15704 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15705 };
15706 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15707 // prefer to fold the multiply with fewer uses.
15709 if (N0->use_size() > N1->use_size())
15710 std::swap(N0, N1);
15711 }
15712
15713 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15714 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15715 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15716 N0.getOperand(1), N1);
15717 }
15718
15719 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15720 // Note: Commutes FADD operands.
15721 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15722 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15723 N1.getOperand(1), N0);
15724 }
15725
15726 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15727 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15728 // This also works with nested fma instructions:
15729 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15730 // fma A, B, (fma C, D, fma (E, F, G))
15731 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15732 // fma A, B, (fma C, D, fma (E, F, G)).
15733 // This requires reassociation because it changes the order of operations.
15734 if (CanReassociate) {
15735 SDValue FMA, E;
15736 if (isFusedOp(N0) && N0.hasOneUse()) {
15737 FMA = N0;
15738 E = N1;
15739 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15740 FMA = N1;
15741 E = N0;
15742 }
15743
15744 SDValue TmpFMA = FMA;
15745 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15746 SDValue FMul = TmpFMA->getOperand(2);
15747 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15748 SDValue C = FMul.getOperand(0);
15749 SDValue D = FMul.getOperand(1);
15750 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15752 // Replacing the inner FMul could cause the outer FMA to be simplified
15753 // away.
15754 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15755 }
15756
15757 TmpFMA = TmpFMA->getOperand(2);
15758 }
15759 }
15760
15761 // Look through FP_EXTEND nodes to do more combining.
15762
15763 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15764 if (matcher.match(N0, ISD::FP_EXTEND)) {
15765 SDValue N00 = N0.getOperand(0);
15766 if (isContractableFMUL(N00) &&
15767 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15768 N00.getValueType())) {
15769 return matcher.getNode(
15770 PreferredFusedOpcode, SL, VT,
15771 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15772 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15773 }
15774 }
15775
15776 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15777 // Note: Commutes FADD operands.
15778 if (matcher.match(N1, ISD::FP_EXTEND)) {
15779 SDValue N10 = N1.getOperand(0);
15780 if (isContractableFMUL(N10) &&
15781 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15782 N10.getValueType())) {
15783 return matcher.getNode(
15784 PreferredFusedOpcode, SL, VT,
15785 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15786 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15787 }
15788 }
15789
15790 // More folding opportunities when target permits.
15791 if (Aggressive) {
15792 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15793 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15794 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15795 SDValue Z) {
15796 return matcher.getNode(
15797 PreferredFusedOpcode, SL, VT, X, Y,
15798 matcher.getNode(PreferredFusedOpcode, SL, VT,
15799 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15800 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15801 };
15802 if (isFusedOp(N0)) {
15803 SDValue N02 = N0.getOperand(2);
15804 if (matcher.match(N02, ISD::FP_EXTEND)) {
15805 SDValue N020 = N02.getOperand(0);
15806 if (isContractableFMUL(N020) &&
15807 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15808 N020.getValueType())) {
15809 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15810 N020.getOperand(0), N020.getOperand(1),
15811 N1);
15812 }
15813 }
15814 }
15815
15816 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15817 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15818 // FIXME: This turns two single-precision and one double-precision
15819 // operation into two double-precision operations, which might not be
15820 // interesting for all targets, especially GPUs.
15821 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15822 SDValue Z) {
15823 return matcher.getNode(
15824 PreferredFusedOpcode, SL, VT,
15825 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15826 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15827 matcher.getNode(PreferredFusedOpcode, SL, VT,
15828 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15829 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15830 };
15831 if (N0.getOpcode() == ISD::FP_EXTEND) {
15832 SDValue N00 = N0.getOperand(0);
15833 if (isFusedOp(N00)) {
15834 SDValue N002 = N00.getOperand(2);
15835 if (isContractableFMUL(N002) &&
15836 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15837 N00.getValueType())) {
15838 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15839 N002.getOperand(0), N002.getOperand(1),
15840 N1);
15841 }
15842 }
15843 }
15844
15845 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15846 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15847 if (isFusedOp(N1)) {
15848 SDValue N12 = N1.getOperand(2);
15849 if (N12.getOpcode() == ISD::FP_EXTEND) {
15850 SDValue N120 = N12.getOperand(0);
15851 if (isContractableFMUL(N120) &&
15852 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15853 N120.getValueType())) {
15854 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15855 N120.getOperand(0), N120.getOperand(1),
15856 N0);
15857 }
15858 }
15859 }
15860
15861 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15862 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15863 // FIXME: This turns two single-precision and one double-precision
15864 // operation into two double-precision operations, which might not be
15865 // interesting for all targets, especially GPUs.
15866 if (N1.getOpcode() == ISD::FP_EXTEND) {
15867 SDValue N10 = N1.getOperand(0);
15868 if (isFusedOp(N10)) {
15869 SDValue N102 = N10.getOperand(2);
15870 if (isContractableFMUL(N102) &&
15871 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15872 N10.getValueType())) {
15873 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15874 N102.getOperand(0), N102.getOperand(1),
15875 N0);
15876 }
15877 }
15878 }
15879 }
15880
15881 return SDValue();
15882}
15883
15884/// Try to perform FMA combining on a given FSUB node.
15885template <class MatchContextClass>
15886SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15887 SDValue N0 = N->getOperand(0);
15888 SDValue N1 = N->getOperand(1);
15889 EVT VT = N->getValueType(0);
15890 SDLoc SL(N);
15891 MatchContextClass matcher(DAG, TLI, N);
15892 const TargetOptions &Options = DAG.getTarget().Options;
15893
15894 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15895
15896 // Floating-point multiply-add with intermediate rounding.
15897 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15898 // FIXME: Add VP_FMAD opcode.
15899 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15900
15901 // Floating-point multiply-add without intermediate rounding.
15902 bool HasFMA =
15904 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15905
15906 // No valid opcode, do not combine.
15907 if (!HasFMAD && !HasFMA)
15908 return SDValue();
15909
15910 const SDNodeFlags Flags = N->getFlags();
15911 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15912 Options.UnsafeFPMath || HasFMAD);
15913
15914 // If the subtraction is not contractable, do not combine.
15915 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15916 return SDValue();
15917
15918 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15919 return SDValue();
15920
15921 // Always prefer FMAD to FMA for precision.
15922 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15924 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15925
15926 // Is the node an FMUL and contractable either due to global flags or
15927 // SDNodeFlags.
15928 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15929 if (!matcher.match(N, ISD::FMUL))
15930 return false;
15931 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15932 };
15933
15934 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15935 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15936 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15937 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15938 XY.getOperand(1),
15939 matcher.getNode(ISD::FNEG, SL, VT, Z));
15940 }
15941 return SDValue();
15942 };
15943
15944 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15945 // Note: Commutes FSUB operands.
15946 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15947 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15948 return matcher.getNode(
15949 PreferredFusedOpcode, SL, VT,
15950 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15951 YZ.getOperand(1), X);
15952 }
15953 return SDValue();
15954 };
15955
15956 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15957 // prefer to fold the multiply with fewer uses.
15958 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15959 (N0->use_size() > N1->use_size())) {
15960 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15961 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15962 return V;
15963 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15964 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15965 return V;
15966 } else {
15967 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15968 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15969 return V;
15970 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15971 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15972 return V;
15973 }
15974
15975 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15976 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15977 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15978 SDValue N00 = N0.getOperand(0).getOperand(0);
15979 SDValue N01 = N0.getOperand(0).getOperand(1);
15980 return matcher.getNode(PreferredFusedOpcode, SL, VT,
15981 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15982 matcher.getNode(ISD::FNEG, SL, VT, N1));
15983 }
15984
15985 // Look through FP_EXTEND nodes to do more combining.
15986
15987 // fold (fsub (fpext (fmul x, y)), z)
15988 // -> (fma (fpext x), (fpext y), (fneg z))
15989 if (matcher.match(N0, ISD::FP_EXTEND)) {
15990 SDValue N00 = N0.getOperand(0);
15991 if (isContractableFMUL(N00) &&
15992 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15993 N00.getValueType())) {
15994 return matcher.getNode(
15995 PreferredFusedOpcode, SL, VT,
15996 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15997 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15998 matcher.getNode(ISD::FNEG, SL, VT, N1));
15999 }
16000 }
16001
16002 // fold (fsub x, (fpext (fmul y, z)))
16003 // -> (fma (fneg (fpext y)), (fpext z), x)
16004 // Note: Commutes FSUB operands.
16005 if (matcher.match(N1, ISD::FP_EXTEND)) {
16006 SDValue N10 = N1.getOperand(0);
16007 if (isContractableFMUL(N10) &&
16008 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16009 N10.getValueType())) {
16010 return matcher.getNode(
16011 PreferredFusedOpcode, SL, VT,
16012 matcher.getNode(
16013 ISD::FNEG, SL, VT,
16014 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16015 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16016 }
16017 }
16018
16019 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16020 // -> (fneg (fma (fpext x), (fpext y), z))
16021 // Note: This could be removed with appropriate canonicalization of the
16022 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16023 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16024 // from implementing the canonicalization in visitFSUB.
16025 if (matcher.match(N0, ISD::FP_EXTEND)) {
16026 SDValue N00 = N0.getOperand(0);
16027 if (matcher.match(N00, ISD::FNEG)) {
16028 SDValue N000 = N00.getOperand(0);
16029 if (isContractableFMUL(N000) &&
16030 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16031 N00.getValueType())) {
16032 return matcher.getNode(
16033 ISD::FNEG, SL, VT,
16034 matcher.getNode(
16035 PreferredFusedOpcode, SL, VT,
16036 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16037 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16038 N1));
16039 }
16040 }
16041 }
16042
16043 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16044 // -> (fneg (fma (fpext x)), (fpext y), z)
16045 // Note: This could be removed with appropriate canonicalization of the
16046 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16047 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16048 // from implementing the canonicalization in visitFSUB.
16049 if (matcher.match(N0, ISD::FNEG)) {
16050 SDValue N00 = N0.getOperand(0);
16051 if (matcher.match(N00, ISD::FP_EXTEND)) {
16052 SDValue N000 = N00.getOperand(0);
16053 if (isContractableFMUL(N000) &&
16054 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16055 N000.getValueType())) {
16056 return matcher.getNode(
16057 ISD::FNEG, SL, VT,
16058 matcher.getNode(
16059 PreferredFusedOpcode, SL, VT,
16060 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16061 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16062 N1));
16063 }
16064 }
16065 }
16066
16067 auto isReassociable = [&Options](SDNode *N) {
16068 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16069 };
16070
16071 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16072 &isReassociable](SDValue N) {
16073 return isContractableFMUL(N) && isReassociable(N.getNode());
16074 };
16075
16076 auto isFusedOp = [&](SDValue N) {
16077 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16078 };
16079
16080 // More folding opportunities when target permits.
16081 if (Aggressive && isReassociable(N)) {
16082 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16083 // fold (fsub (fma x, y, (fmul u, v)), z)
16084 // -> (fma x, y (fma u, v, (fneg z)))
16085 if (CanFuse && isFusedOp(N0) &&
16086 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16087 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16088 return matcher.getNode(
16089 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16090 matcher.getNode(PreferredFusedOpcode, SL, VT,
16091 N0.getOperand(2).getOperand(0),
16092 N0.getOperand(2).getOperand(1),
16093 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16094 }
16095
16096 // fold (fsub x, (fma y, z, (fmul u, v)))
16097 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16098 if (CanFuse && isFusedOp(N1) &&
16099 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16100 N1->hasOneUse() && NoSignedZero) {
16101 SDValue N20 = N1.getOperand(2).getOperand(0);
16102 SDValue N21 = N1.getOperand(2).getOperand(1);
16103 return matcher.getNode(
16104 PreferredFusedOpcode, SL, VT,
16105 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16106 N1.getOperand(1),
16107 matcher.getNode(PreferredFusedOpcode, SL, VT,
16108 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16109 }
16110
16111 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16112 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16113 if (isFusedOp(N0) && N0->hasOneUse()) {
16114 SDValue N02 = N0.getOperand(2);
16115 if (matcher.match(N02, ISD::FP_EXTEND)) {
16116 SDValue N020 = N02.getOperand(0);
16117 if (isContractableAndReassociableFMUL(N020) &&
16118 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16119 N020.getValueType())) {
16120 return matcher.getNode(
16121 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16122 matcher.getNode(
16123 PreferredFusedOpcode, SL, VT,
16124 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16125 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16126 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16127 }
16128 }
16129 }
16130
16131 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16132 // -> (fma (fpext x), (fpext y),
16133 // (fma (fpext u), (fpext v), (fneg z)))
16134 // FIXME: This turns two single-precision and one double-precision
16135 // operation into two double-precision operations, which might not be
16136 // interesting for all targets, especially GPUs.
16137 if (matcher.match(N0, ISD::FP_EXTEND)) {
16138 SDValue N00 = N0.getOperand(0);
16139 if (isFusedOp(N00)) {
16140 SDValue N002 = N00.getOperand(2);
16141 if (isContractableAndReassociableFMUL(N002) &&
16142 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16143 N00.getValueType())) {
16144 return matcher.getNode(
16145 PreferredFusedOpcode, SL, VT,
16146 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16147 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16148 matcher.getNode(
16149 PreferredFusedOpcode, SL, VT,
16150 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16151 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16152 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16153 }
16154 }
16155 }
16156
16157 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16158 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16159 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16160 N1->hasOneUse()) {
16161 SDValue N120 = N1.getOperand(2).getOperand(0);
16162 if (isContractableAndReassociableFMUL(N120) &&
16163 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16164 N120.getValueType())) {
16165 SDValue N1200 = N120.getOperand(0);
16166 SDValue N1201 = N120.getOperand(1);
16167 return matcher.getNode(
16168 PreferredFusedOpcode, SL, VT,
16169 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16170 N1.getOperand(1),
16171 matcher.getNode(
16172 PreferredFusedOpcode, SL, VT,
16173 matcher.getNode(ISD::FNEG, SL, VT,
16174 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16175 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16176 }
16177 }
16178
16179 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16180 // -> (fma (fneg (fpext y)), (fpext z),
16181 // (fma (fneg (fpext u)), (fpext v), x))
16182 // FIXME: This turns two single-precision and one double-precision
16183 // operation into two double-precision operations, which might not be
16184 // interesting for all targets, especially GPUs.
16185 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16186 SDValue CvtSrc = N1.getOperand(0);
16187 SDValue N100 = CvtSrc.getOperand(0);
16188 SDValue N101 = CvtSrc.getOperand(1);
16189 SDValue N102 = CvtSrc.getOperand(2);
16190 if (isContractableAndReassociableFMUL(N102) &&
16191 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16192 CvtSrc.getValueType())) {
16193 SDValue N1020 = N102.getOperand(0);
16194 SDValue N1021 = N102.getOperand(1);
16195 return matcher.getNode(
16196 PreferredFusedOpcode, SL, VT,
16197 matcher.getNode(ISD::FNEG, SL, VT,
16198 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16199 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16200 matcher.getNode(
16201 PreferredFusedOpcode, SL, VT,
16202 matcher.getNode(ISD::FNEG, SL, VT,
16203 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16204 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16205 }
16206 }
16207 }
16208
16209 return SDValue();
16210}
16211
16212/// Try to perform FMA combining on a given FMUL node based on the distributive
16213/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16214/// subtraction instead of addition).
16215SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16216 SDValue N0 = N->getOperand(0);
16217 SDValue N1 = N->getOperand(1);
16218 EVT VT = N->getValueType(0);
16219 SDLoc SL(N);
16220
16221 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16222
16223 const TargetOptions &Options = DAG.getTarget().Options;
16224
16225 // The transforms below are incorrect when x == 0 and y == inf, because the
16226 // intermediate multiplication produces a nan.
16227 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16228 if (!hasNoInfs(Options, FAdd))
16229 return SDValue();
16230
16231 // Floating-point multiply-add without intermediate rounding.
16232 bool HasFMA =
16235 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16236
16237 // Floating-point multiply-add with intermediate rounding. This can result
16238 // in a less precise result due to the changed rounding order.
16239 bool HasFMAD = Options.UnsafeFPMath &&
16240 (LegalOperations && TLI.isFMADLegal(DAG, N));
16241
16242 // No valid opcode, do not combine.
16243 if (!HasFMAD && !HasFMA)
16244 return SDValue();
16245
16246 // Always prefer FMAD to FMA for precision.
16247 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16249
16250 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16251 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16252 auto FuseFADD = [&](SDValue X, SDValue Y) {
16253 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16254 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16255 if (C->isExactlyValue(+1.0))
16256 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16257 Y);
16258 if (C->isExactlyValue(-1.0))
16259 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16260 DAG.getNode(ISD::FNEG, SL, VT, Y));
16261 }
16262 }
16263 return SDValue();
16264 };
16265
16266 if (SDValue FMA = FuseFADD(N0, N1))
16267 return FMA;
16268 if (SDValue FMA = FuseFADD(N1, N0))
16269 return FMA;
16270
16271 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16272 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16273 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16274 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16275 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16276 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16277 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16278 if (C0->isExactlyValue(+1.0))
16279 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16280 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16281 Y);
16282 if (C0->isExactlyValue(-1.0))
16283 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16284 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16285 DAG.getNode(ISD::FNEG, SL, VT, Y));
16286 }
16287 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16288 if (C1->isExactlyValue(+1.0))
16289 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16290 DAG.getNode(ISD::FNEG, SL, VT, Y));
16291 if (C1->isExactlyValue(-1.0))
16292 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16293 Y);
16294 }
16295 }
16296 return SDValue();
16297 };
16298
16299 if (SDValue FMA = FuseFSUB(N0, N1))
16300 return FMA;
16301 if (SDValue FMA = FuseFSUB(N1, N0))
16302 return FMA;
16303
16304 return SDValue();
16305}
16306
16307SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16308 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16309
16310 // FADD -> FMA combines:
16311 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16312 if (Fused.getOpcode() != ISD::DELETED_NODE)
16313 AddToWorklist(Fused.getNode());
16314 return Fused;
16315 }
16316 return SDValue();
16317}
16318
16319SDValue DAGCombiner::visitFADD(SDNode *N) {
16320 SDValue N0 = N->getOperand(0);
16321 SDValue N1 = N->getOperand(1);
16324 EVT VT = N->getValueType(0);
16325 SDLoc DL(N);
16326 const TargetOptions &Options = DAG.getTarget().Options;
16327 SDNodeFlags Flags = N->getFlags();
16328 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16329
16330 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16331 return R;
16332
16333 // fold (fadd c1, c2) -> c1 + c2
16334 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16335 return C;
16336
16337 // canonicalize constant to RHS
16338 if (N0CFP && !N1CFP)
16339 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16340
16341 // fold vector ops
16342 if (VT.isVector())
16343 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16344 return FoldedVOp;
16345
16346 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16347 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16348 if (N1C && N1C->isZero())
16349 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16350 return N0;
16351
16352 if (SDValue NewSel = foldBinOpIntoSelect(N))
16353 return NewSel;
16354
16355 // fold (fadd A, (fneg B)) -> (fsub A, B)
16356 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16357 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16358 N1, DAG, LegalOperations, ForCodeSize))
16359 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16360
16361 // fold (fadd (fneg A), B) -> (fsub B, A)
16362 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16363 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16364 N0, DAG, LegalOperations, ForCodeSize))
16365 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16366
16367 auto isFMulNegTwo = [](SDValue FMul) {
16368 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16369 return false;
16370 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16371 return C && C->isExactlyValue(-2.0);
16372 };
16373
16374 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16375 if (isFMulNegTwo(N0)) {
16376 SDValue B = N0.getOperand(0);
16377 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16378 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16379 }
16380 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16381 if (isFMulNegTwo(N1)) {
16382 SDValue B = N1.getOperand(0);
16383 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16384 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16385 }
16386
16387 // No FP constant should be created after legalization as Instruction
16388 // Selection pass has a hard time dealing with FP constants.
16389 bool AllowNewConst = (Level < AfterLegalizeDAG);
16390
16391 // If nnan is enabled, fold lots of things.
16392 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16393 // If allowed, fold (fadd (fneg x), x) -> 0.0
16394 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16395 return DAG.getConstantFP(0.0, DL, VT);
16396
16397 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16398 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16399 return DAG.getConstantFP(0.0, DL, VT);
16400 }
16401
16402 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16403 // TODO: break out portions of the transformations below for which Unsafe is
16404 // considered and which do not require both nsz and reassoc
16405 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16406 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16407 AllowNewConst) {
16408 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16409 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16411 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16412 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16413 }
16414
16415 // We can fold chains of FADD's of the same value into multiplications.
16416 // This transform is not safe in general because we are reducing the number
16417 // of rounding steps.
16418 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16419 if (N0.getOpcode() == ISD::FMUL) {
16420 SDNode *CFP00 =
16422 SDNode *CFP01 =
16424
16425 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16426 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16427 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16428 DAG.getConstantFP(1.0, DL, VT));
16429 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16430 }
16431
16432 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16433 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16434 N1.getOperand(0) == N1.getOperand(1) &&
16435 N0.getOperand(0) == N1.getOperand(0)) {
16436 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16437 DAG.getConstantFP(2.0, DL, VT));
16438 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16439 }
16440 }
16441
16442 if (N1.getOpcode() == ISD::FMUL) {
16443 SDNode *CFP10 =
16445 SDNode *CFP11 =
16447
16448 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16449 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16450 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16451 DAG.getConstantFP(1.0, DL, VT));
16452 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16453 }
16454
16455 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16456 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16457 N0.getOperand(0) == N0.getOperand(1) &&
16458 N1.getOperand(0) == N0.getOperand(0)) {
16459 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16460 DAG.getConstantFP(2.0, DL, VT));
16461 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16462 }
16463 }
16464
16465 if (N0.getOpcode() == ISD::FADD) {
16466 SDNode *CFP00 =
16468 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16469 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16470 (N0.getOperand(0) == N1)) {
16471 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16472 DAG.getConstantFP(3.0, DL, VT));
16473 }
16474 }
16475
16476 if (N1.getOpcode() == ISD::FADD) {
16477 SDNode *CFP10 =
16479 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16480 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16481 N1.getOperand(0) == N0) {
16482 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16483 DAG.getConstantFP(3.0, DL, VT));
16484 }
16485 }
16486
16487 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16488 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16489 N0.getOperand(0) == N0.getOperand(1) &&
16490 N1.getOperand(0) == N1.getOperand(1) &&
16491 N0.getOperand(0) == N1.getOperand(0)) {
16492 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16493 DAG.getConstantFP(4.0, DL, VT));
16494 }
16495 }
16496
16497 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16498 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16499 VT, N0, N1, Flags))
16500 return SD;
16501 } // enable-unsafe-fp-math
16502
16503 // FADD -> FMA combines:
16504 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16505 if (Fused.getOpcode() != ISD::DELETED_NODE)
16506 AddToWorklist(Fused.getNode());
16507 return Fused;
16508 }
16509 return SDValue();
16510}
16511
16512SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16513 SDValue Chain = N->getOperand(0);
16514 SDValue N0 = N->getOperand(1);
16515 SDValue N1 = N->getOperand(2);
16516 EVT VT = N->getValueType(0);
16517 EVT ChainVT = N->getValueType(1);
16518 SDLoc DL(N);
16519 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16520
16521 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16522 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16523 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16524 N1, DAG, LegalOperations, ForCodeSize)) {
16525 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16526 {Chain, N0, NegN1});
16527 }
16528
16529 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16530 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16531 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16532 N0, DAG, LegalOperations, ForCodeSize)) {
16533 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16534 {Chain, N1, NegN0});
16535 }
16536 return SDValue();
16537}
16538
16539SDValue DAGCombiner::visitFSUB(SDNode *N) {
16540 SDValue N0 = N->getOperand(0);
16541 SDValue N1 = N->getOperand(1);
16542 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16543 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16544 EVT VT = N->getValueType(0);
16545 SDLoc DL(N);
16546 const TargetOptions &Options = DAG.getTarget().Options;
16547 const SDNodeFlags Flags = N->getFlags();
16548 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16549
16550 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16551 return R;
16552
16553 // fold (fsub c1, c2) -> c1-c2
16554 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16555 return C;
16556
16557 // fold vector ops
16558 if (VT.isVector())
16559 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16560 return FoldedVOp;
16561
16562 if (SDValue NewSel = foldBinOpIntoSelect(N))
16563 return NewSel;
16564
16565 // (fsub A, 0) -> A
16566 if (N1CFP && N1CFP->isZero()) {
16567 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16568 Flags.hasNoSignedZeros()) {
16569 return N0;
16570 }
16571 }
16572
16573 if (N0 == N1) {
16574 // (fsub x, x) -> 0.0
16575 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16576 return DAG.getConstantFP(0.0f, DL, VT);
16577 }
16578
16579 // (fsub -0.0, N1) -> -N1
16580 if (N0CFP && N0CFP->isZero()) {
16581 if (N0CFP->isNegative() ||
16582 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16583 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16584 // flushed to zero, unless all users treat denorms as zero (DAZ).
16585 // FIXME: This transform will change the sign of a NaN and the behavior
16586 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16587 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16588 if (DenormMode == DenormalMode::getIEEE()) {
16589 if (SDValue NegN1 =
16590 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16591 return NegN1;
16592 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16593 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16594 }
16595 }
16596 }
16597
16598 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16599 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16600 N1.getOpcode() == ISD::FADD) {
16601 // X - (X + Y) -> -Y
16602 if (N0 == N1->getOperand(0))
16603 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16604 // X - (Y + X) -> -Y
16605 if (N0 == N1->getOperand(1))
16606 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16607 }
16608
16609 // fold (fsub A, (fneg B)) -> (fadd A, B)
16610 if (SDValue NegN1 =
16611 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16612 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16613
16614 // FSUB -> FMA combines:
16615 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16616 AddToWorklist(Fused.getNode());
16617 return Fused;
16618 }
16619
16620 return SDValue();
16621}
16622
16623// Transform IEEE Floats:
16624// (fmul C, (uitofp Pow2))
16625// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16626// (fdiv C, (uitofp Pow2))
16627// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16628//
16629// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16630// there is no need for more than an add/sub.
16631//
16632// This is valid under the following circumstances:
16633// 1) We are dealing with IEEE floats
16634// 2) C is normal
16635// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16636// TODO: Much of this could also be used for generating `ldexp` on targets the
16637// prefer it.
16638SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16639 EVT VT = N->getValueType(0);
16640 SDValue ConstOp, Pow2Op;
16641
16642 std::optional<int> Mantissa;
16643 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16644 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16645 return false;
16646
16647 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16648 Pow2Op = N->getOperand(1 - ConstOpIdx);
16649 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16650 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16651 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16652 return false;
16653
16654 Pow2Op = Pow2Op.getOperand(0);
16655
16656 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16657 // TODO: We could use knownbits to make this bound more precise.
16658 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16659
16660 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16661 if (CFP == nullptr)
16662 return false;
16663
16664 const APFloat &APF = CFP->getValueAPF();
16665
16666 // Make sure we have normal/ieee constant.
16667 if (!APF.isNormal() || !APF.isIEEE())
16668 return false;
16669
16670 // Make sure the floats exponent is within the bounds that this transform
16671 // produces bitwise equals value.
16672 int CurExp = ilogb(APF);
16673 // FMul by pow2 will only increase exponent.
16674 int MinExp =
16675 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16676 // FDiv by pow2 will only decrease exponent.
16677 int MaxExp =
16678 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16679 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16681 return false;
16682
16683 // Finally make sure we actually know the mantissa for the float type.
16684 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16685 if (!Mantissa)
16686 Mantissa = ThisMantissa;
16687
16688 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16689 };
16690
16691 // TODO: We may be able to include undefs.
16692 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16693 };
16694
16695 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16696 return SDValue();
16697
16698 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16699 return SDValue();
16700
16701 // Get log2 after all other checks have taken place. This is because
16702 // BuildLogBase2 may create a new node.
16703 SDLoc DL(N);
16704 // Get Log2 type with same bitwidth as the float type (VT).
16705 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16706 if (VT.isVector())
16707 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16709
16710 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16711 /*InexpensiveOnly*/ true, NewIntVT);
16712 if (!Log2)
16713 return SDValue();
16714
16715 // Perform actual transform.
16716 SDValue MantissaShiftCnt =
16717 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16718 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16719 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16720 // cast. We could implement that by handle here to handle the casts.
16721 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16722 SDValue ResAsInt =
16723 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16724 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16725 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16726 return ResAsFP;
16727}
16728
16729SDValue DAGCombiner::visitFMUL(SDNode *N) {
16730 SDValue N0 = N->getOperand(0);
16731 SDValue N1 = N->getOperand(1);
16732 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16733 EVT VT = N->getValueType(0);
16734 SDLoc DL(N);
16735 const TargetOptions &Options = DAG.getTarget().Options;
16736 const SDNodeFlags Flags = N->getFlags();
16737 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16738
16739 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16740 return R;
16741
16742 // fold (fmul c1, c2) -> c1*c2
16743 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16744 return C;
16745
16746 // canonicalize constant to RHS
16749 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16750
16751 // fold vector ops
16752 if (VT.isVector())
16753 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16754 return FoldedVOp;
16755
16756 if (SDValue NewSel = foldBinOpIntoSelect(N))
16757 return NewSel;
16758
16759 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16760 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16762 N0.getOpcode() == ISD::FMUL) {
16763 SDValue N00 = N0.getOperand(0);
16764 SDValue N01 = N0.getOperand(1);
16765 // Avoid an infinite loop by making sure that N00 is not a constant
16766 // (the inner multiply has not been constant folded yet).
16769 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16770 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16771 }
16772 }
16773
16774 // Match a special-case: we convert X * 2.0 into fadd.
16775 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16776 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16777 N0.getOperand(0) == N0.getOperand(1)) {
16778 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16779 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16780 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16781 }
16782
16783 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16784 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16785 VT, N0, N1, Flags))
16786 return SD;
16787 }
16788
16789 // fold (fmul X, 2.0) -> (fadd X, X)
16790 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16791 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16792
16793 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16794 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16795 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16796 return DAG.getNode(ISD::FSUB, DL, VT,
16797 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16798 }
16799 }
16800
16801 // -N0 * -N1 --> N0 * N1
16806 SDValue NegN0 =
16807 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16808 if (NegN0) {
16809 HandleSDNode NegN0Handle(NegN0);
16810 SDValue NegN1 =
16811 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16812 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16814 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16815 }
16816
16817 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16818 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16819 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16820 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16821 TLI.isOperationLegal(ISD::FABS, VT)) {
16822 SDValue Select = N0, X = N1;
16823 if (Select.getOpcode() != ISD::SELECT)
16824 std::swap(Select, X);
16825
16826 SDValue Cond = Select.getOperand(0);
16827 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16828 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16829
16830 if (TrueOpnd && FalseOpnd &&
16831 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16832 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16833 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16834 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16835 switch (CC) {
16836 default: break;
16837 case ISD::SETOLT:
16838 case ISD::SETULT:
16839 case ISD::SETOLE:
16840 case ISD::SETULE:
16841 case ISD::SETLT:
16842 case ISD::SETLE:
16843 std::swap(TrueOpnd, FalseOpnd);
16844 [[fallthrough]];
16845 case ISD::SETOGT:
16846 case ISD::SETUGT:
16847 case ISD::SETOGE:
16848 case ISD::SETUGE:
16849 case ISD::SETGT:
16850 case ISD::SETGE:
16851 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16852 TLI.isOperationLegal(ISD::FNEG, VT))
16853 return DAG.getNode(ISD::FNEG, DL, VT,
16854 DAG.getNode(ISD::FABS, DL, VT, X));
16855 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16856 return DAG.getNode(ISD::FABS, DL, VT, X);
16857
16858 break;
16859 }
16860 }
16861 }
16862
16863 // FMUL -> FMA combines:
16864 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16865 AddToWorklist(Fused.getNode());
16866 return Fused;
16867 }
16868
16869 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16870 // able to run.
16871 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16872 return R;
16873
16874 return SDValue();
16875}
16876
16877template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16878 SDValue N0 = N->getOperand(0);
16879 SDValue N1 = N->getOperand(1);
16880 SDValue N2 = N->getOperand(2);
16881 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16882 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16883 EVT VT = N->getValueType(0);
16884 SDLoc DL(N);
16885 const TargetOptions &Options = DAG.getTarget().Options;
16886 // FMA nodes have flags that propagate to the created nodes.
16887 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16888 MatchContextClass matcher(DAG, TLI, N);
16889
16890 bool CanReassociate =
16891 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16892
16893 // Constant fold FMA.
16894 if (isa<ConstantFPSDNode>(N0) &&
16895 isa<ConstantFPSDNode>(N1) &&
16896 isa<ConstantFPSDNode>(N2)) {
16897 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16898 }
16899
16900 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16905 SDValue NegN0 =
16906 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16907 if (NegN0) {
16908 HandleSDNode NegN0Handle(NegN0);
16909 SDValue NegN1 =
16910 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16911 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16913 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16914 }
16915
16916 // FIXME: use fast math flags instead of Options.UnsafeFPMath
16917 if (Options.UnsafeFPMath) {
16918 if (N0CFP && N0CFP->isZero())
16919 return N2;
16920 if (N1CFP && N1CFP->isZero())
16921 return N2;
16922 }
16923
16924 // FIXME: Support splat of constant.
16925 if (N0CFP && N0CFP->isExactlyValue(1.0))
16926 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16927 if (N1CFP && N1CFP->isExactlyValue(1.0))
16928 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16929
16930 // Canonicalize (fma c, x, y) -> (fma x, c, y)
16933 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16934
16935 if (CanReassociate) {
16936 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16937 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16940 return matcher.getNode(
16941 ISD::FMUL, DL, VT, N0,
16942 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16943 }
16944
16945 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16946 if (matcher.match(N0, ISD::FMUL) &&
16949 return matcher.getNode(
16950 ISD::FMA, DL, VT, N0.getOperand(0),
16951 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16952 }
16953 }
16954
16955 // (fma x, -1, y) -> (fadd (fneg x), y)
16956 // FIXME: Support splat of constant.
16957 if (N1CFP) {
16958 if (N1CFP->isExactlyValue(1.0))
16959 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16960
16961 if (N1CFP->isExactlyValue(-1.0) &&
16962 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16963 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16964 AddToWorklist(RHSNeg.getNode());
16965 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16966 }
16967
16968 // fma (fneg x), K, y -> fma x -K, y
16969 if (matcher.match(N0, ISD::FNEG) &&
16971 (N1.hasOneUse() &&
16972 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16973 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16974 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16975 }
16976 }
16977
16978 // FIXME: Support splat of constant.
16979 if (CanReassociate) {
16980 // (fma x, c, x) -> (fmul x, (c+1))
16981 if (N1CFP && N0 == N2) {
16982 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16983 matcher.getNode(ISD::FADD, DL, VT, N1,
16984 DAG.getConstantFP(1.0, DL, VT)));
16985 }
16986
16987 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16988 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16989 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16990 matcher.getNode(ISD::FADD, DL, VT, N1,
16991 DAG.getConstantFP(-1.0, DL, VT)));
16992 }
16993 }
16994
16995 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16996 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16997 if (!TLI.isFNegFree(VT))
16999 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17000 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17001 return SDValue();
17002}
17003
17004SDValue DAGCombiner::visitFMAD(SDNode *N) {
17005 SDValue N0 = N->getOperand(0);
17006 SDValue N1 = N->getOperand(1);
17007 SDValue N2 = N->getOperand(2);
17008 EVT VT = N->getValueType(0);
17009 SDLoc DL(N);
17010
17011 // Constant fold FMAD.
17012 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17013 isa<ConstantFPSDNode>(N2))
17014 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17015
17016 return SDValue();
17017}
17018
17019// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17020// reciprocal.
17021// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17022// Notice that this is not always beneficial. One reason is different targets
17023// may have different costs for FDIV and FMUL, so sometimes the cost of two
17024// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17025// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17026SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17027 // TODO: Limit this transform based on optsize/minsize - it always creates at
17028 // least 1 extra instruction. But the perf win may be substantial enough
17029 // that only minsize should restrict this.
17030 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17031 const SDNodeFlags Flags = N->getFlags();
17032 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17033 return SDValue();
17034
17035 // Skip if current node is a reciprocal/fneg-reciprocal.
17036 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17037 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17038 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17039 return SDValue();
17040
17041 // Exit early if the target does not want this transform or if there can't
17042 // possibly be enough uses of the divisor to make the transform worthwhile.
17043 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17044
17045 // For splat vectors, scale the number of uses by the splat factor. If we can
17046 // convert the division into a scalar op, that will likely be much faster.
17047 unsigned NumElts = 1;
17048 EVT VT = N->getValueType(0);
17049 if (VT.isVector() && DAG.isSplatValue(N1))
17050 NumElts = VT.getVectorMinNumElements();
17051
17052 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17053 return SDValue();
17054
17055 // Find all FDIV users of the same divisor.
17056 // Use a set because duplicates may be present in the user list.
17058 for (auto *U : N1->uses()) {
17059 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17060 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17061 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17062 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17063 U->getFlags().hasAllowReassociation() &&
17064 U->getFlags().hasNoSignedZeros())
17065 continue;
17066
17067 // This division is eligible for optimization only if global unsafe math
17068 // is enabled or if this division allows reciprocal formation.
17069 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17070 Users.insert(U);
17071 }
17072 }
17073
17074 // Now that we have the actual number of divisor uses, make sure it meets
17075 // the minimum threshold specified by the target.
17076 if ((Users.size() * NumElts) < MinUses)
17077 return SDValue();
17078
17079 SDLoc DL(N);
17080 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17081 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17082
17083 // Dividend / Divisor -> Dividend * Reciprocal
17084 for (auto *U : Users) {
17085 SDValue Dividend = U->getOperand(0);
17086 if (Dividend != FPOne) {
17087 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17088 Reciprocal, Flags);
17089 CombineTo(U, NewNode);
17090 } else if (U != Reciprocal.getNode()) {
17091 // In the absence of fast-math-flags, this user node is always the
17092 // same node as Reciprocal, but with FMF they may be different nodes.
17093 CombineTo(U, Reciprocal);
17094 }
17095 }
17096 return SDValue(N, 0); // N was replaced.
17097}
17098
17099SDValue DAGCombiner::visitFDIV(SDNode *N) {
17100 SDValue N0 = N->getOperand(0);
17101 SDValue N1 = N->getOperand(1);
17102 EVT VT = N->getValueType(0);
17103 SDLoc DL(N);
17104 const TargetOptions &Options = DAG.getTarget().Options;
17105 SDNodeFlags Flags = N->getFlags();
17106 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17107
17108 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17109 return R;
17110
17111 // fold (fdiv c1, c2) -> c1/c2
17112 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17113 return C;
17114
17115 // fold vector ops
17116 if (VT.isVector())
17117 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17118 return FoldedVOp;
17119
17120 if (SDValue NewSel = foldBinOpIntoSelect(N))
17121 return NewSel;
17122
17124 return V;
17125
17126 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17127 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17128 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17129 // Compute the reciprocal 1.0 / c2.
17130 const APFloat &N1APF = N1CFP->getValueAPF();
17131 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17132 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17133 // Only do the transform if the reciprocal is a legal fp immediate that
17134 // isn't too nasty (eg NaN, denormal, ...).
17135 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17136 (!LegalOperations ||
17137 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17138 // backend)... we should handle this gracefully after Legalize.
17139 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17141 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17142 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17143 DAG.getConstantFP(Recip, DL, VT));
17144 }
17145
17146 // If this FDIV is part of a reciprocal square root, it may be folded
17147 // into a target-specific square root estimate instruction.
17148 if (N1.getOpcode() == ISD::FSQRT) {
17149 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17150 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17151 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17152 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17153 if (SDValue RV =
17154 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17155 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17156 AddToWorklist(RV.getNode());
17157 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17158 }
17159 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17160 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17161 if (SDValue RV =
17162 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17163 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17164 AddToWorklist(RV.getNode());
17165 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17166 }
17167 } else if (N1.getOpcode() == ISD::FMUL) {
17168 // Look through an FMUL. Even though this won't remove the FDIV directly,
17169 // it's still worthwhile to get rid of the FSQRT if possible.
17170 SDValue Sqrt, Y;
17171 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17172 Sqrt = N1.getOperand(0);
17173 Y = N1.getOperand(1);
17174 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17175 Sqrt = N1.getOperand(1);
17176 Y = N1.getOperand(0);
17177 }
17178 if (Sqrt.getNode()) {
17179 // If the other multiply operand is known positive, pull it into the
17180 // sqrt. That will eliminate the division if we convert to an estimate.
17181 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17182 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17183 SDValue A;
17184 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17185 A = Y.getOperand(0);
17186 else if (Y == Sqrt.getOperand(0))
17187 A = Y;
17188 if (A) {
17189 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17190 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17191 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17192 SDValue AAZ =
17193 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17194 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17195 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17196
17197 // Estimate creation failed. Clean up speculatively created nodes.
17198 recursivelyDeleteUnusedNodes(AAZ.getNode());
17199 }
17200 }
17201
17202 // We found a FSQRT, so try to make this fold:
17203 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17204 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17205 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17206 AddToWorklist(Div.getNode());
17207 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17208 }
17209 }
17210 }
17211
17212 // Fold into a reciprocal estimate and multiply instead of a real divide.
17213 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17214 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17215 return RV;
17216 }
17217
17218 // Fold X/Sqrt(X) -> Sqrt(X)
17219 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17220 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17221 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17222 return N1;
17223
17224 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17229 SDValue NegN0 =
17230 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17231 if (NegN0) {
17232 HandleSDNode NegN0Handle(NegN0);
17233 SDValue NegN1 =
17234 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17235 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17237 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17238 }
17239
17240 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17241 return R;
17242
17243 return SDValue();
17244}
17245
17246SDValue DAGCombiner::visitFREM(SDNode *N) {
17247 SDValue N0 = N->getOperand(0);
17248 SDValue N1 = N->getOperand(1);
17249 EVT VT = N->getValueType(0);
17250 SDNodeFlags Flags = N->getFlags();
17251 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17252
17253 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17254 return R;
17255
17256 // fold (frem c1, c2) -> fmod(c1,c2)
17257 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17258 return C;
17259
17260 if (SDValue NewSel = foldBinOpIntoSelect(N))
17261 return NewSel;
17262
17263 return SDValue();
17264}
17265
17266SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17267 SDNodeFlags Flags = N->getFlags();
17268 const TargetOptions &Options = DAG.getTarget().Options;
17269
17270 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17271 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17272 if (!Flags.hasApproximateFuncs() ||
17273 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17274 return SDValue();
17275
17276 SDValue N0 = N->getOperand(0);
17277 if (TLI.isFsqrtCheap(N0, DAG))
17278 return SDValue();
17279
17280 // FSQRT nodes have flags that propagate to the created nodes.
17281 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17282 // transform the fdiv, we may produce a sub-optimal estimate sequence
17283 // because the reciprocal calculation may not have to filter out a
17284 // 0.0 input.
17285 return buildSqrtEstimate(N0, Flags);
17286}
17287
17288/// copysign(x, fp_extend(y)) -> copysign(x, y)
17289/// copysign(x, fp_round(y)) -> copysign(x, y)
17290/// Operands to the functions are the type of X and Y respectively.
17291static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17292 // Always fold no-op FP casts.
17293 if (XTy == YTy)
17294 return true;
17295
17296 // Do not optimize out type conversion of f128 type yet.
17297 // For some targets like x86_64, configuration is changed to keep one f128
17298 // value in one SSE register, but instruction selection cannot handle
17299 // FCOPYSIGN on SSE registers yet.
17300 if (YTy == MVT::f128)
17301 return false;
17302
17304}
17305
17307 SDValue N1 = N->getOperand(1);
17308 if (N1.getOpcode() != ISD::FP_EXTEND &&
17309 N1.getOpcode() != ISD::FP_ROUND)
17310 return false;
17311 EVT N1VT = N1->getValueType(0);
17312 EVT N1Op0VT = N1->getOperand(0).getValueType();
17313 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17314}
17315
17316SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17317 SDValue N0 = N->getOperand(0);
17318 SDValue N1 = N->getOperand(1);
17319 EVT VT = N->getValueType(0);
17320
17321 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17322 if (SDValue C =
17323 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17324 return C;
17325
17326 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17327 const APFloat &V = N1C->getValueAPF();
17328 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17329 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17330 if (!V.isNegative()) {
17331 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17332 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17333 } else {
17334 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17335 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17336 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17337 }
17338 }
17339
17340 // copysign(fabs(x), y) -> copysign(x, y)
17341 // copysign(fneg(x), y) -> copysign(x, y)
17342 // copysign(copysign(x,z), y) -> copysign(x, y)
17343 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17344 N0.getOpcode() == ISD::FCOPYSIGN)
17345 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17346
17347 // copysign(x, abs(y)) -> abs(x)
17348 if (N1.getOpcode() == ISD::FABS)
17349 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17350
17351 // copysign(x, copysign(y,z)) -> copysign(x, z)
17352 if (N1.getOpcode() == ISD::FCOPYSIGN)
17353 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17354
17355 // copysign(x, fp_extend(y)) -> copysign(x, y)
17356 // copysign(x, fp_round(y)) -> copysign(x, y)
17358 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17359
17360 return SDValue();
17361}
17362
17363SDValue DAGCombiner::visitFPOW(SDNode *N) {
17364 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17365 if (!ExponentC)
17366 return SDValue();
17367 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17368
17369 // Try to convert x ** (1/3) into cube root.
17370 // TODO: Handle the various flavors of long double.
17371 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17372 // Some range near 1/3 should be fine.
17373 EVT VT = N->getValueType(0);
17374 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17375 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17376 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17377 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17378 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17379 // For regular numbers, rounding may cause the results to differ.
17380 // Therefore, we require { nsz ninf nnan afn } for this transform.
17381 // TODO: We could select out the special cases if we don't have nsz/ninf.
17382 SDNodeFlags Flags = N->getFlags();
17383 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17384 !Flags.hasApproximateFuncs())
17385 return SDValue();
17386
17387 // Do not create a cbrt() libcall if the target does not have it, and do not
17388 // turn a pow that has lowering support into a cbrt() libcall.
17389 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17392 return SDValue();
17393
17394 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17395 }
17396
17397 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17398 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17399 // TODO: This could be extended (using a target hook) to handle smaller
17400 // power-of-2 fractional exponents.
17401 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17402 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17403 if (ExponentIs025 || ExponentIs075) {
17404 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17405 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17406 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17407 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17408 // For regular numbers, rounding may cause the results to differ.
17409 // Therefore, we require { nsz ninf afn } for this transform.
17410 // TODO: We could select out the special cases if we don't have nsz/ninf.
17411 SDNodeFlags Flags = N->getFlags();
17412
17413 // We only need no signed zeros for the 0.25 case.
17414 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17415 !Flags.hasApproximateFuncs())
17416 return SDValue();
17417
17418 // Don't double the number of libcalls. We are trying to inline fast code.
17420 return SDValue();
17421
17422 // Assume that libcalls are the smallest code.
17423 // TODO: This restriction should probably be lifted for vectors.
17424 if (ForCodeSize)
17425 return SDValue();
17426
17427 // pow(X, 0.25) --> sqrt(sqrt(X))
17428 SDLoc DL(N);
17429 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17430 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17431 if (ExponentIs025)
17432 return SqrtSqrt;
17433 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17434 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17435 }
17436
17437 return SDValue();
17438}
17439
17441 const TargetLowering &TLI) {
17442 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17443 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17444 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17445 // conversions would return +0.0.
17446 // FIXME: We should be able to use node-level FMF here.
17447 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17448 EVT VT = N->getValueType(0);
17449 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17451 return SDValue();
17452
17453 // fptosi/fptoui round towards zero, so converting from FP to integer and
17454 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17455 SDValue N0 = N->getOperand(0);
17456 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17457 N0.getOperand(0).getValueType() == VT)
17458 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17459
17460 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17461 N0.getOperand(0).getValueType() == VT)
17462 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17463
17464 return SDValue();
17465}
17466
17467SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17468 SDValue N0 = N->getOperand(0);
17469 EVT VT = N->getValueType(0);
17470 EVT OpVT = N0.getValueType();
17471
17472 // [us]itofp(undef) = 0, because the result value is bounded.
17473 if (N0.isUndef())
17474 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17475
17476 // fold (sint_to_fp c1) -> c1fp
17478 // ...but only if the target supports immediate floating-point values
17479 (!LegalOperations ||
17481 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17482
17483 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17484 // but UINT_TO_FP is legal on this target, try to convert.
17485 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17486 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17487 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17488 if (DAG.SignBitIsZero(N0))
17489 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17490 }
17491
17492 // The next optimizations are desirable only if SELECT_CC can be lowered.
17493 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17494 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17495 !VT.isVector() &&
17496 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17497 SDLoc DL(N);
17498 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17499 DAG.getConstantFP(0.0, DL, VT));
17500 }
17501
17502 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17503 // (select (setcc x, y, cc), 1.0, 0.0)
17504 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17505 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17506 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17507 SDLoc DL(N);
17508 return DAG.getSelect(DL, VT, N0.getOperand(0),
17509 DAG.getConstantFP(1.0, DL, VT),
17510 DAG.getConstantFP(0.0, DL, VT));
17511 }
17512
17513 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17514 return FTrunc;
17515
17516 return SDValue();
17517}
17518
17519SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17520 SDValue N0 = N->getOperand(0);
17521 EVT VT = N->getValueType(0);
17522 EVT OpVT = N0.getValueType();
17523
17524 // [us]itofp(undef) = 0, because the result value is bounded.
17525 if (N0.isUndef())
17526 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17527
17528 // fold (uint_to_fp c1) -> c1fp
17530 // ...but only if the target supports immediate floating-point values
17531 (!LegalOperations ||
17533 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17534
17535 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17536 // but SINT_TO_FP is legal on this target, try to convert.
17537 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17538 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17539 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17540 if (DAG.SignBitIsZero(N0))
17541 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17542 }
17543
17544 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17545 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17546 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17547 SDLoc DL(N);
17548 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17549 DAG.getConstantFP(0.0, DL, VT));
17550 }
17551
17552 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17553 return FTrunc;
17554
17555 return SDValue();
17556}
17557
17558// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17560 SDValue N0 = N->getOperand(0);
17561 EVT VT = N->getValueType(0);
17562
17563 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17564 return SDValue();
17565
17566 SDValue Src = N0.getOperand(0);
17567 EVT SrcVT = Src.getValueType();
17568 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17569 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17570
17571 // We can safely assume the conversion won't overflow the output range,
17572 // because (for example) (uint8_t)18293.f is undefined behavior.
17573
17574 // Since we can assume the conversion won't overflow, our decision as to
17575 // whether the input will fit in the float should depend on the minimum
17576 // of the input range and output range.
17577
17578 // This means this is also safe for a signed input and unsigned output, since
17579 // a negative input would lead to undefined behavior.
17580 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17581 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17582 unsigned ActualSize = std::min(InputSize, OutputSize);
17583 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17584
17585 // We can only fold away the float conversion if the input range can be
17586 // represented exactly in the float range.
17587 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17588 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17589 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17591 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17592 }
17593 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17594 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17595 return DAG.getBitcast(VT, Src);
17596 }
17597 return SDValue();
17598}
17599
17600SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17601 SDValue N0 = N->getOperand(0);
17602 EVT VT = N->getValueType(0);
17603
17604 // fold (fp_to_sint undef) -> undef
17605 if (N0.isUndef())
17606 return DAG.getUNDEF(VT);
17607
17608 // fold (fp_to_sint c1fp) -> c1
17610 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17611
17612 return FoldIntToFPToInt(N, DAG);
17613}
17614
17615SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17616 SDValue N0 = N->getOperand(0);
17617 EVT VT = N->getValueType(0);
17618
17619 // fold (fp_to_uint undef) -> undef
17620 if (N0.isUndef())
17621 return DAG.getUNDEF(VT);
17622
17623 // fold (fp_to_uint c1fp) -> c1
17625 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17626
17627 return FoldIntToFPToInt(N, DAG);
17628}
17629
17630SDValue DAGCombiner::visitXRINT(SDNode *N) {
17631 SDValue N0 = N->getOperand(0);
17632 EVT VT = N->getValueType(0);
17633
17634 // fold (lrint|llrint undef) -> undef
17635 if (N0.isUndef())
17636 return DAG.getUNDEF(VT);
17637
17638 // fold (lrint|llrint c1fp) -> c1
17640 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17641
17642 return SDValue();
17643}
17644
17645SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17646 SDValue N0 = N->getOperand(0);
17647 SDValue N1 = N->getOperand(1);
17648 EVT VT = N->getValueType(0);
17649
17650 // fold (fp_round c1fp) -> c1fp
17651 if (SDValue C =
17652 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17653 return C;
17654
17655 // fold (fp_round (fp_extend x)) -> x
17656 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17657 return N0.getOperand(0);
17658
17659 // fold (fp_round (fp_round x)) -> (fp_round x)
17660 if (N0.getOpcode() == ISD::FP_ROUND) {
17661 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17662 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17663
17664 // Avoid folding legal fp_rounds into non-legal ones.
17665 if (!hasOperation(ISD::FP_ROUND, VT))
17666 return SDValue();
17667
17668 // Skip this folding if it results in an fp_round from f80 to f16.
17669 //
17670 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17671 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17672 // instructions from f32 or f64. Moreover, the first (value-preserving)
17673 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17674 // x86.
17675 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17676 return SDValue();
17677
17678 // If the first fp_round isn't a value preserving truncation, it might
17679 // introduce a tie in the second fp_round, that wouldn't occur in the
17680 // single-step fp_round we want to fold to.
17681 // In other words, double rounding isn't the same as rounding.
17682 // Also, this is a value preserving truncation iff both fp_round's are.
17683 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17684 SDLoc DL(N);
17685 return DAG.getNode(
17686 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17687 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17688 }
17689 }
17690
17691 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17692 // Note: From a legality perspective, this is a two step transform. First,
17693 // we duplicate the fp_round to the arguments of the copysign, then we
17694 // eliminate the fp_round on Y. The second step requires an additional
17695 // predicate to match the implementation above.
17696 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17698 N0.getValueType())) {
17699 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17700 N0.getOperand(0), N1);
17701 AddToWorklist(Tmp.getNode());
17702 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17703 Tmp, N0.getOperand(1));
17704 }
17705
17706 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17707 return NewVSel;
17708
17709 return SDValue();
17710}
17711
17712SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17713 SDValue N0 = N->getOperand(0);
17714 EVT VT = N->getValueType(0);
17715
17716 if (VT.isVector())
17717 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17718 return FoldedVOp;
17719
17720 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17721 if (N->hasOneUse() &&
17722 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17723 return SDValue();
17724
17725 // fold (fp_extend c1fp) -> c1fp
17727 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17728
17729 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17730 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17732 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17733
17734 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17735 // value of X.
17736 if (N0.getOpcode() == ISD::FP_ROUND
17737 && N0.getConstantOperandVal(1) == 1) {
17738 SDValue In = N0.getOperand(0);
17739 if (In.getValueType() == VT) return In;
17740 if (VT.bitsLT(In.getValueType()))
17741 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17742 In, N0.getOperand(1));
17743 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17744 }
17745
17746 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17747 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17749 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17750 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17751 LN0->getChain(),
17752 LN0->getBasePtr(), N0.getValueType(),
17753 LN0->getMemOperand());
17754 CombineTo(N, ExtLoad);
17755 CombineTo(
17756 N0.getNode(),
17757 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17758 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17759 ExtLoad.getValue(1));
17760 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17761 }
17762
17763 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17764 return NewVSel;
17765
17766 return SDValue();
17767}
17768
17769SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17770 SDValue N0 = N->getOperand(0);
17771 EVT VT = N->getValueType(0);
17772
17773 // fold (fceil c1) -> fceil(c1)
17775 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17776
17777 return SDValue();
17778}
17779
17780SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17781 SDValue N0 = N->getOperand(0);
17782 EVT VT = N->getValueType(0);
17783
17784 // fold (ftrunc c1) -> ftrunc(c1)
17786 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17787
17788 // fold ftrunc (known rounded int x) -> x
17789 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17790 // likely to be generated to extract integer from a rounded floating value.
17791 switch (N0.getOpcode()) {
17792 default: break;
17793 case ISD::FRINT:
17794 case ISD::FTRUNC:
17795 case ISD::FNEARBYINT:
17796 case ISD::FROUNDEVEN:
17797 case ISD::FFLOOR:
17798 case ISD::FCEIL:
17799 return N0;
17800 }
17801
17802 return SDValue();
17803}
17804
17805SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17806 SDValue N0 = N->getOperand(0);
17807
17808 // fold (ffrexp c1) -> ffrexp(c1)
17810 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17811 return SDValue();
17812}
17813
17814SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17815 SDValue N0 = N->getOperand(0);
17816 EVT VT = N->getValueType(0);
17817
17818 // fold (ffloor c1) -> ffloor(c1)
17820 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17821
17822 return SDValue();
17823}
17824
17825SDValue DAGCombiner::visitFNEG(SDNode *N) {
17826 SDValue N0 = N->getOperand(0);
17827 EVT VT = N->getValueType(0);
17828 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17829
17830 // Constant fold FNEG.
17832 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17833
17834 if (SDValue NegN0 =
17835 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17836 return NegN0;
17837
17838 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17839 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17840 // know it was called from a context with a nsz flag if the input fsub does
17841 // not.
17842 if (N0.getOpcode() == ISD::FSUB &&
17844 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17845 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17846 N0.getOperand(0));
17847 }
17848
17849 if (SDValue Cast = foldSignChangeInBitcast(N))
17850 return Cast;
17851
17852 return SDValue();
17853}
17854
17855SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17856 SDValue N0 = N->getOperand(0);
17857 SDValue N1 = N->getOperand(1);
17858 EVT VT = N->getValueType(0);
17859 const SDNodeFlags Flags = N->getFlags();
17860 unsigned Opc = N->getOpcode();
17861 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17862 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17863 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17864
17865 // Constant fold.
17866 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17867 return C;
17868
17869 // Canonicalize to constant on RHS.
17872 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17873
17874 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17875 const APFloat &AF = N1CFP->getValueAPF();
17876
17877 // minnum(X, nan) -> X
17878 // maxnum(X, nan) -> X
17879 // minimum(X, nan) -> nan
17880 // maximum(X, nan) -> nan
17881 if (AF.isNaN())
17882 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17883
17884 // In the following folds, inf can be replaced with the largest finite
17885 // float, if the ninf flag is set.
17886 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17887 // minnum(X, -inf) -> -inf
17888 // maxnum(X, +inf) -> +inf
17889 // minimum(X, -inf) -> -inf if nnan
17890 // maximum(X, +inf) -> +inf if nnan
17891 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17892 return N->getOperand(1);
17893
17894 // minnum(X, +inf) -> X if nnan
17895 // maxnum(X, -inf) -> X if nnan
17896 // minimum(X, +inf) -> X
17897 // maximum(X, -inf) -> X
17898 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17899 return N->getOperand(0);
17900 }
17901 }
17902
17903 if (SDValue SD = reassociateReduction(
17904 PropagatesNaN
17907 Opc, SDLoc(N), VT, N0, N1, Flags))
17908 return SD;
17909
17910 return SDValue();
17911}
17912
17913SDValue DAGCombiner::visitFABS(SDNode *N) {
17914 SDValue N0 = N->getOperand(0);
17915 EVT VT = N->getValueType(0);
17916
17917 // fold (fabs c1) -> fabs(c1)
17919 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17920
17921 // fold (fabs (fabs x)) -> (fabs x)
17922 if (N0.getOpcode() == ISD::FABS)
17923 return N->getOperand(0);
17924
17925 // fold (fabs (fneg x)) -> (fabs x)
17926 // fold (fabs (fcopysign x, y)) -> (fabs x)
17927 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17928 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17929
17930 if (SDValue Cast = foldSignChangeInBitcast(N))
17931 return Cast;
17932
17933 return SDValue();
17934}
17935
17936SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17937 SDValue Chain = N->getOperand(0);
17938 SDValue N1 = N->getOperand(1);
17939 SDValue N2 = N->getOperand(2);
17940
17941 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17942 // nondeterministic jumps).
17943 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17944 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17945 N1->getOperand(0), N2);
17946 }
17947
17948 // Variant of the previous fold where there is a SETCC in between:
17949 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
17950 // =>
17951 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17952 // =>
17953 // BRCOND(SETCC(X, CONST, Cond))
17954 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17955 // isn't equivalent to true or false.
17956 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17957 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17958 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17959 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17960 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17961 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17962 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17963 bool Updated = false;
17964
17965 // Is 'X Cond C' always true or false?
17966 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17967 bool False = (Cond == ISD::SETULT && C->isZero()) ||
17968 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17969 (Cond == ISD::SETUGT && C->isAllOnes()) ||
17970 (Cond == ISD::SETGT && C->isMaxSignedValue());
17971 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17972 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17973 (Cond == ISD::SETUGE && C->isZero()) ||
17974 (Cond == ISD::SETGE && C->isMinSignedValue());
17975 return True || False;
17976 };
17977
17978 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17979 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17980 S0 = S0->getOperand(0);
17981 Updated = true;
17982 }
17983 }
17984 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17985 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17986 S1 = S1->getOperand(0);
17987 Updated = true;
17988 }
17989 }
17990
17991 if (Updated)
17992 return DAG.getNode(
17993 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17994 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17995 }
17996
17997 // If N is a constant we could fold this into a fallthrough or unconditional
17998 // branch. However that doesn't happen very often in normal code, because
17999 // Instcombine/SimplifyCFG should have handled the available opportunities.
18000 // If we did this folding here, it would be necessary to update the
18001 // MachineBasicBlock CFG, which is awkward.
18002
18003 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18004 // on the target.
18005 if (N1.getOpcode() == ISD::SETCC &&
18007 N1.getOperand(0).getValueType())) {
18008 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18009 Chain, N1.getOperand(2),
18010 N1.getOperand(0), N1.getOperand(1), N2);
18011 }
18012
18013 if (N1.hasOneUse()) {
18014 // rebuildSetCC calls visitXor which may change the Chain when there is a
18015 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18016 HandleSDNode ChainHandle(Chain);
18017 if (SDValue NewN1 = rebuildSetCC(N1))
18018 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18019 ChainHandle.getValue(), NewN1, N2);
18020 }
18021
18022 return SDValue();
18023}
18024
18025SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18026 if (N.getOpcode() == ISD::SRL ||
18027 (N.getOpcode() == ISD::TRUNCATE &&
18028 (N.getOperand(0).hasOneUse() &&
18029 N.getOperand(0).getOpcode() == ISD::SRL))) {
18030 // Look pass the truncate.
18031 if (N.getOpcode() == ISD::TRUNCATE)
18032 N = N.getOperand(0);
18033
18034 // Match this pattern so that we can generate simpler code:
18035 //
18036 // %a = ...
18037 // %b = and i32 %a, 2
18038 // %c = srl i32 %b, 1
18039 // brcond i32 %c ...
18040 //
18041 // into
18042 //
18043 // %a = ...
18044 // %b = and i32 %a, 2
18045 // %c = setcc eq %b, 0
18046 // brcond %c ...
18047 //
18048 // This applies only when the AND constant value has one bit set and the
18049 // SRL constant is equal to the log2 of the AND constant. The back-end is
18050 // smart enough to convert the result into a TEST/JMP sequence.
18051 SDValue Op0 = N.getOperand(0);
18052 SDValue Op1 = N.getOperand(1);
18053
18054 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18055 SDValue AndOp1 = Op0.getOperand(1);
18056
18057 if (AndOp1.getOpcode() == ISD::Constant) {
18058 const APInt &AndConst = AndOp1->getAsAPIntVal();
18059
18060 if (AndConst.isPowerOf2() &&
18061 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18062 SDLoc DL(N);
18063 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18064 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18065 ISD::SETNE);
18066 }
18067 }
18068 }
18069 }
18070
18071 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18072 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18073 if (N.getOpcode() == ISD::XOR) {
18074 // Because we may call this on a speculatively constructed
18075 // SimplifiedSetCC Node, we need to simplify this node first.
18076 // Ideally this should be folded into SimplifySetCC and not
18077 // here. For now, grab a handle to N so we don't lose it from
18078 // replacements interal to the visit.
18079 HandleSDNode XORHandle(N);
18080 while (N.getOpcode() == ISD::XOR) {
18081 SDValue Tmp = visitXOR(N.getNode());
18082 // No simplification done.
18083 if (!Tmp.getNode())
18084 break;
18085 // Returning N is form in-visit replacement that may invalidated
18086 // N. Grab value from Handle.
18087 if (Tmp.getNode() == N.getNode())
18088 N = XORHandle.getValue();
18089 else // Node simplified. Try simplifying again.
18090 N = Tmp;
18091 }
18092
18093 if (N.getOpcode() != ISD::XOR)
18094 return N;
18095
18096 SDValue Op0 = N->getOperand(0);
18097 SDValue Op1 = N->getOperand(1);
18098
18099 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18100 bool Equal = false;
18101 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18102 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18103 Op0.getValueType() == MVT::i1) {
18104 N = Op0;
18105 Op0 = N->getOperand(0);
18106 Op1 = N->getOperand(1);
18107 Equal = true;
18108 }
18109
18110 EVT SetCCVT = N.getValueType();
18111 if (LegalTypes)
18112 SetCCVT = getSetCCResultType(SetCCVT);
18113 // Replace the uses of XOR with SETCC
18114 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18115 Equal ? ISD::SETEQ : ISD::SETNE);
18116 }
18117 }
18118
18119 return SDValue();
18120}
18121
18122// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18123//
18124SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18125 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18126 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18127
18128 // If N is a constant we could fold this into a fallthrough or unconditional
18129 // branch. However that doesn't happen very often in normal code, because
18130 // Instcombine/SimplifyCFG should have handled the available opportunities.
18131 // If we did this folding here, it would be necessary to update the
18132 // MachineBasicBlock CFG, which is awkward.
18133
18134 // Use SimplifySetCC to simplify SETCC's.
18136 CondLHS, CondRHS, CC->get(), SDLoc(N),
18137 false);
18138 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18139
18140 // fold to a simpler setcc
18141 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18142 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18143 N->getOperand(0), Simp.getOperand(2),
18144 Simp.getOperand(0), Simp.getOperand(1),
18145 N->getOperand(4));
18146
18147 return SDValue();
18148}
18149
18150static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18151 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18152 const TargetLowering &TLI) {
18153 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18154 if (LD->isIndexed())
18155 return false;
18156 EVT VT = LD->getMemoryVT();
18157 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18158 return false;
18159 Ptr = LD->getBasePtr();
18160 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18161 if (ST->isIndexed())
18162 return false;
18163 EVT VT = ST->getMemoryVT();
18164 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18165 return false;
18166 Ptr = ST->getBasePtr();
18167 IsLoad = false;
18168 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18169 if (LD->isIndexed())
18170 return false;
18171 EVT VT = LD->getMemoryVT();
18172 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18173 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18174 return false;
18175 Ptr = LD->getBasePtr();
18176 IsMasked = true;
18177 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18178 if (ST->isIndexed())
18179 return false;
18180 EVT VT = ST->getMemoryVT();
18181 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18182 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18183 return false;
18184 Ptr = ST->getBasePtr();
18185 IsLoad = false;
18186 IsMasked = true;
18187 } else {
18188 return false;
18189 }
18190 return true;
18191}
18192
18193/// Try turning a load/store into a pre-indexed load/store when the base
18194/// pointer is an add or subtract and it has other uses besides the load/store.
18195/// After the transformation, the new indexed load/store has effectively folded
18196/// the add/subtract in and all of its other uses are redirected to the
18197/// new load/store.
18198bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18199 if (Level < AfterLegalizeDAG)
18200 return false;
18201
18202 bool IsLoad = true;
18203 bool IsMasked = false;
18204 SDValue Ptr;
18205 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18206 Ptr, TLI))
18207 return false;
18208
18209 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18210 // out. There is no reason to make this a preinc/predec.
18211 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18212 Ptr->hasOneUse())
18213 return false;
18214
18215 // Ask the target to do addressing mode selection.
18219 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18220 return false;
18221
18222 // Backends without true r+i pre-indexed forms may need to pass a
18223 // constant base with a variable offset so that constant coercion
18224 // will work with the patterns in canonical form.
18225 bool Swapped = false;
18226 if (isa<ConstantSDNode>(BasePtr)) {
18227 std::swap(BasePtr, Offset);
18228 Swapped = true;
18229 }
18230
18231 // Don't create a indexed load / store with zero offset.
18233 return false;
18234
18235 // Try turning it into a pre-indexed load / store except when:
18236 // 1) The new base ptr is a frame index.
18237 // 2) If N is a store and the new base ptr is either the same as or is a
18238 // predecessor of the value being stored.
18239 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18240 // that would create a cycle.
18241 // 4) All uses are load / store ops that use it as old base ptr.
18242
18243 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18244 // (plus the implicit offset) to a register to preinc anyway.
18245 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18246 return false;
18247
18248 // Check #2.
18249 if (!IsLoad) {
18250 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18251 : cast<StoreSDNode>(N)->getValue();
18252
18253 // Would require a copy.
18254 if (Val == BasePtr)
18255 return false;
18256
18257 // Would create a cycle.
18258 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18259 return false;
18260 }
18261
18262 // Caches for hasPredecessorHelper.
18265 Worklist.push_back(N);
18266
18267 // If the offset is a constant, there may be other adds of constants that
18268 // can be folded with this one. We should do this to avoid having to keep
18269 // a copy of the original base pointer.
18270 SmallVector<SDNode *, 16> OtherUses;
18271 constexpr unsigned int MaxSteps = 8192;
18272 if (isa<ConstantSDNode>(Offset))
18273 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18274 UE = BasePtr->use_end();
18275 UI != UE; ++UI) {
18276 SDUse &Use = UI.getUse();
18277 // Skip the use that is Ptr and uses of other results from BasePtr's
18278 // node (important for nodes that return multiple results).
18279 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18280 continue;
18281
18282 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18283 MaxSteps))
18284 continue;
18285
18286 if (Use.getUser()->getOpcode() != ISD::ADD &&
18287 Use.getUser()->getOpcode() != ISD::SUB) {
18288 OtherUses.clear();
18289 break;
18290 }
18291
18292 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18293 if (!isa<ConstantSDNode>(Op1)) {
18294 OtherUses.clear();
18295 break;
18296 }
18297
18298 // FIXME: In some cases, we can be smarter about this.
18299 if (Op1.getValueType() != Offset.getValueType()) {
18300 OtherUses.clear();
18301 break;
18302 }
18303
18304 OtherUses.push_back(Use.getUser());
18305 }
18306
18307 if (Swapped)
18308 std::swap(BasePtr, Offset);
18309
18310 // Now check for #3 and #4.
18311 bool RealUse = false;
18312
18313 for (SDNode *Use : Ptr->uses()) {
18314 if (Use == N)
18315 continue;
18316 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18317 return false;
18318
18319 // If Ptr may be folded in addressing mode of other use, then it's
18320 // not profitable to do this transformation.
18321 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18322 RealUse = true;
18323 }
18324
18325 if (!RealUse)
18326 return false;
18327
18329 if (!IsMasked) {
18330 if (IsLoad)
18331 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18332 else
18333 Result =
18334 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18335 } else {
18336 if (IsLoad)
18337 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18338 Offset, AM);
18339 else
18340 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18341 Offset, AM);
18342 }
18343 ++PreIndexedNodes;
18344 ++NodesCombined;
18345 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18346 Result.dump(&DAG); dbgs() << '\n');
18347 WorklistRemover DeadNodes(*this);
18348 if (IsLoad) {
18349 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18350 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18351 } else {
18352 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18353 }
18354
18355 // Finally, since the node is now dead, remove it from the graph.
18356 deleteAndRecombine(N);
18357
18358 if (Swapped)
18359 std::swap(BasePtr, Offset);
18360
18361 // Replace other uses of BasePtr that can be updated to use Ptr
18362 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18363 unsigned OffsetIdx = 1;
18364 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18365 OffsetIdx = 0;
18366 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18367 BasePtr.getNode() && "Expected BasePtr operand");
18368
18369 // We need to replace ptr0 in the following expression:
18370 // x0 * offset0 + y0 * ptr0 = t0
18371 // knowing that
18372 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18373 //
18374 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18375 // indexed load/store and the expression that needs to be re-written.
18376 //
18377 // Therefore, we have:
18378 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18379
18380 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18381 const APInt &Offset0 = CN->getAPIntValue();
18382 const APInt &Offset1 = Offset->getAsAPIntVal();
18383 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18384 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18385 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18386 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18387
18388 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18389
18390 APInt CNV = Offset0;
18391 if (X0 < 0) CNV = -CNV;
18392 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18393 else CNV = CNV - Offset1;
18394
18395 SDLoc DL(OtherUses[i]);
18396
18397 // We can now generate the new expression.
18398 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18399 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18400
18401 SDValue NewUse = DAG.getNode(Opcode,
18402 DL,
18403 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18404 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18405 deleteAndRecombine(OtherUses[i]);
18406 }
18407
18408 // Replace the uses of Ptr with uses of the updated base value.
18409 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18410 deleteAndRecombine(Ptr.getNode());
18411 AddToWorklist(Result.getNode());
18412
18413 return true;
18414}
18415
18417 SDValue &BasePtr, SDValue &Offset,
18419 SelectionDAG &DAG,
18420 const TargetLowering &TLI) {
18421 if (PtrUse == N ||
18422 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18423 return false;
18424
18425 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18426 return false;
18427
18428 // Don't create a indexed load / store with zero offset.
18430 return false;
18431
18432 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18433 return false;
18434
18436 for (SDNode *Use : BasePtr->uses()) {
18437 if (Use == Ptr.getNode())
18438 continue;
18439
18440 // No if there's a later user which could perform the index instead.
18441 if (isa<MemSDNode>(Use)) {
18442 bool IsLoad = true;
18443 bool IsMasked = false;
18444 SDValue OtherPtr;
18446 IsMasked, OtherPtr, TLI)) {
18448 Worklist.push_back(Use);
18449 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18450 return false;
18451 }
18452 }
18453
18454 // If all the uses are load / store addresses, then don't do the
18455 // transformation.
18456 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18457 for (SDNode *UseUse : Use->uses())
18458 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18459 return false;
18460 }
18461 }
18462 return true;
18463}
18464
18466 bool &IsMasked, SDValue &Ptr,
18467 SDValue &BasePtr, SDValue &Offset,
18469 SelectionDAG &DAG,
18470 const TargetLowering &TLI) {
18472 IsMasked, Ptr, TLI) ||
18473 Ptr->hasOneUse())
18474 return nullptr;
18475
18476 // Try turning it into a post-indexed load / store except when
18477 // 1) All uses are load / store ops that use it as base ptr (and
18478 // it may be folded as addressing mmode).
18479 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18480 // nor a successor of N. Otherwise, if Op is folded that would
18481 // create a cycle.
18482 for (SDNode *Op : Ptr->uses()) {
18483 // Check for #1.
18484 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18485 continue;
18486
18487 // Check for #2.
18490 constexpr unsigned int MaxSteps = 8192;
18491 // Ptr is predecessor to both N and Op.
18492 Visited.insert(Ptr.getNode());
18493 Worklist.push_back(N);
18494 Worklist.push_back(Op);
18495 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18496 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18497 return Op;
18498 }
18499 return nullptr;
18500}
18501
18502/// Try to combine a load/store with a add/sub of the base pointer node into a
18503/// post-indexed load/store. The transformation folded the add/subtract into the
18504/// new indexed load/store effectively and all of its uses are redirected to the
18505/// new load/store.
18506bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18507 if (Level < AfterLegalizeDAG)
18508 return false;
18509
18510 bool IsLoad = true;
18511 bool IsMasked = false;
18512 SDValue Ptr;
18516 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18517 Offset, AM, DAG, TLI);
18518 if (!Op)
18519 return false;
18520
18522 if (!IsMasked)
18523 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18524 Offset, AM)
18525 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18526 BasePtr, Offset, AM);
18527 else
18528 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18529 BasePtr, Offset, AM)
18531 BasePtr, Offset, AM);
18532 ++PostIndexedNodes;
18533 ++NodesCombined;
18534 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18535 Result.dump(&DAG); dbgs() << '\n');
18536 WorklistRemover DeadNodes(*this);
18537 if (IsLoad) {
18538 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18539 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18540 } else {
18541 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18542 }
18543
18544 // Finally, since the node is now dead, remove it from the graph.
18545 deleteAndRecombine(N);
18546
18547 // Replace the uses of Use with uses of the updated base value.
18549 Result.getValue(IsLoad ? 1 : 0));
18550 deleteAndRecombine(Op);
18551 return true;
18552}
18553
18554/// Return the base-pointer arithmetic from an indexed \p LD.
18555SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18556 ISD::MemIndexedMode AM = LD->getAddressingMode();
18557 assert(AM != ISD::UNINDEXED);
18558 SDValue BP = LD->getOperand(1);
18559 SDValue Inc = LD->getOperand(2);
18560
18561 // Some backends use TargetConstants for load offsets, but don't expect
18562 // TargetConstants in general ADD nodes. We can convert these constants into
18563 // regular Constants (if the constant is not opaque).
18565 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18566 "Cannot split out indexing using opaque target constants");
18567 if (Inc.getOpcode() == ISD::TargetConstant) {
18568 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18569 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18570 ConstInc->getValueType(0));
18571 }
18572
18573 unsigned Opc =
18574 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18575 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18576}
18577
18579 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18580}
18581
18582bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18583 EVT STType = Val.getValueType();
18584 EVT STMemType = ST->getMemoryVT();
18585 if (STType == STMemType)
18586 return true;
18587 if (isTypeLegal(STMemType))
18588 return false; // fail.
18589 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18590 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18591 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18592 return true;
18593 }
18594 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18595 STType.isInteger() && STMemType.isInteger()) {
18596 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18597 return true;
18598 }
18599 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18600 Val = DAG.getBitcast(STMemType, Val);
18601 return true;
18602 }
18603 return false; // fail.
18604}
18605
18606bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18607 EVT LDMemType = LD->getMemoryVT();
18608 EVT LDType = LD->getValueType(0);
18609 assert(Val.getValueType() == LDMemType &&
18610 "Attempting to extend value of non-matching type");
18611 if (LDType == LDMemType)
18612 return true;
18613 if (LDMemType.isInteger() && LDType.isInteger()) {
18614 switch (LD->getExtensionType()) {
18615 case ISD::NON_EXTLOAD:
18616 Val = DAG.getBitcast(LDType, Val);
18617 return true;
18618 case ISD::EXTLOAD:
18619 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18620 return true;
18621 case ISD::SEXTLOAD:
18622 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18623 return true;
18624 case ISD::ZEXTLOAD:
18625 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18626 return true;
18627 }
18628 }
18629 return false;
18630}
18631
18632StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18633 int64_t &Offset) {
18634 SDValue Chain = LD->getOperand(0);
18635
18636 // Look through CALLSEQ_START.
18637 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18638 Chain = Chain->getOperand(0);
18639
18640 StoreSDNode *ST = nullptr;
18642 if (Chain.getOpcode() == ISD::TokenFactor) {
18643 // Look for unique store within the TokenFactor.
18644 for (SDValue Op : Chain->ops()) {
18645 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18646 if (!Store)
18647 continue;
18648 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18649 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18650 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18651 continue;
18652 // Make sure the store is not aliased with any nodes in TokenFactor.
18653 GatherAllAliases(Store, Chain, Aliases);
18654 if (Aliases.empty() ||
18655 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18656 ST = Store;
18657 break;
18658 }
18659 } else {
18660 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18661 if (Store) {
18662 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18663 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18664 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18665 ST = Store;
18666 }
18667 }
18668
18669 return ST;
18670}
18671
18672SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18673 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18674 return SDValue();
18675 SDValue Chain = LD->getOperand(0);
18676 int64_t Offset;
18677
18678 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18679 // TODO: Relax this restriction for unordered atomics (see D66309)
18680 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18681 return SDValue();
18682
18683 EVT LDType = LD->getValueType(0);
18684 EVT LDMemType = LD->getMemoryVT();
18685 EVT STMemType = ST->getMemoryVT();
18686 EVT STType = ST->getValue().getValueType();
18687
18688 // There are two cases to consider here:
18689 // 1. The store is fixed width and the load is scalable. In this case we
18690 // don't know at compile time if the store completely envelops the load
18691 // so we abandon the optimisation.
18692 // 2. The store is scalable and the load is fixed width. We could
18693 // potentially support a limited number of cases here, but there has been
18694 // no cost-benefit analysis to prove it's worth it.
18695 bool LdStScalable = LDMemType.isScalableVT();
18696 if (LdStScalable != STMemType.isScalableVT())
18697 return SDValue();
18698
18699 // If we are dealing with scalable vectors on a big endian platform the
18700 // calculation of offsets below becomes trickier, since we do not know at
18701 // compile time the absolute size of the vector. Until we've done more
18702 // analysis on big-endian platforms it seems better to bail out for now.
18703 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18704 return SDValue();
18705
18706 // Normalize for Endianness. After this Offset=0 will denote that the least
18707 // significant bit in the loaded value maps to the least significant bit in
18708 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18709 // n:th least significant byte of the stored value.
18710 int64_t OrigOffset = Offset;
18711 if (DAG.getDataLayout().isBigEndian())
18712 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18713 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18714 8 -
18715 Offset;
18716
18717 // Check that the stored value cover all bits that are loaded.
18718 bool STCoversLD;
18719
18720 TypeSize LdMemSize = LDMemType.getSizeInBits();
18721 TypeSize StMemSize = STMemType.getSizeInBits();
18722 if (LdStScalable)
18723 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18724 else
18725 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18726 StMemSize.getFixedValue());
18727
18728 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18729 if (LD->isIndexed()) {
18730 // Cannot handle opaque target constants and we must respect the user's
18731 // request not to split indexes from loads.
18732 if (!canSplitIdx(LD))
18733 return SDValue();
18734 SDValue Idx = SplitIndexingFromLoad(LD);
18735 SDValue Ops[] = {Val, Idx, Chain};
18736 return CombineTo(LD, Ops, 3);
18737 }
18738 return CombineTo(LD, Val, Chain);
18739 };
18740
18741 if (!STCoversLD)
18742 return SDValue();
18743
18744 // Memory as copy space (potentially masked).
18745 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18746 // Simple case: Direct non-truncating forwarding
18747 if (LDType.getSizeInBits() == LdMemSize)
18748 return ReplaceLd(LD, ST->getValue(), Chain);
18749 // Can we model the truncate and extension with an and mask?
18750 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18751 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18752 // Mask to size of LDMemType
18753 auto Mask =
18755 StMemSize.getFixedValue()),
18756 SDLoc(ST), STType);
18757 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18758 return ReplaceLd(LD, Val, Chain);
18759 }
18760 }
18761
18762 // Handle some cases for big-endian that would be Offset 0 and handled for
18763 // little-endian.
18764 SDValue Val = ST->getValue();
18765 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18766 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18767 !LDType.isVector() && isTypeLegal(STType) &&
18768 TLI.isOperationLegal(ISD::SRL, STType)) {
18769 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18770 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18771 Offset = 0;
18772 }
18773 }
18774
18775 // TODO: Deal with nonzero offset.
18776 if (LD->getBasePtr().isUndef() || Offset != 0)
18777 return SDValue();
18778 // Model necessary truncations / extenstions.
18779 // Truncate Value To Stored Memory Size.
18780 do {
18781 if (!getTruncatedStoreValue(ST, Val))
18782 continue;
18783 if (!isTypeLegal(LDMemType))
18784 continue;
18785 if (STMemType != LDMemType) {
18786 // TODO: Support vectors? This requires extract_subvector/bitcast.
18787 if (!STMemType.isVector() && !LDMemType.isVector() &&
18788 STMemType.isInteger() && LDMemType.isInteger())
18789 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18790 else
18791 continue;
18792 }
18793 if (!extendLoadedValueToExtension(LD, Val))
18794 continue;
18795 return ReplaceLd(LD, Val, Chain);
18796 } while (false);
18797
18798 // On failure, cleanup dead nodes we may have created.
18799 if (Val->use_empty())
18800 deleteAndRecombine(Val.getNode());
18801 return SDValue();
18802}
18803
18804SDValue DAGCombiner::visitLOAD(SDNode *N) {
18805 LoadSDNode *LD = cast<LoadSDNode>(N);
18806 SDValue Chain = LD->getChain();
18807 SDValue Ptr = LD->getBasePtr();
18808
18809 // If load is not volatile and there are no uses of the loaded value (and
18810 // the updated indexed value in case of indexed loads), change uses of the
18811 // chain value into uses of the chain input (i.e. delete the dead load).
18812 // TODO: Allow this for unordered atomics (see D66309)
18813 if (LD->isSimple()) {
18814 if (N->getValueType(1) == MVT::Other) {
18815 // Unindexed loads.
18816 if (!N->hasAnyUseOfValue(0)) {
18817 // It's not safe to use the two value CombineTo variant here. e.g.
18818 // v1, chain2 = load chain1, loc
18819 // v2, chain3 = load chain2, loc
18820 // v3 = add v2, c
18821 // Now we replace use of chain2 with chain1. This makes the second load
18822 // isomorphic to the one we are deleting, and thus makes this load live.
18823 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18824 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18825 dbgs() << "\n");
18826 WorklistRemover DeadNodes(*this);
18827 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18828 AddUsersToWorklist(Chain.getNode());
18829 if (N->use_empty())
18830 deleteAndRecombine(N);
18831
18832 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18833 }
18834 } else {
18835 // Indexed loads.
18836 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18837
18838 // If this load has an opaque TargetConstant offset, then we cannot split
18839 // the indexing into an add/sub directly (that TargetConstant may not be
18840 // valid for a different type of node, and we cannot convert an opaque
18841 // target constant into a regular constant).
18842 bool CanSplitIdx = canSplitIdx(LD);
18843
18844 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18845 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18846 SDValue Index;
18847 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18848 Index = SplitIndexingFromLoad(LD);
18849 // Try to fold the base pointer arithmetic into subsequent loads and
18850 // stores.
18851 AddUsersToWorklist(N);
18852 } else
18853 Index = DAG.getUNDEF(N->getValueType(1));
18854 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18855 dbgs() << "\nWith: "; Undef.dump(&DAG);
18856 dbgs() << " and 2 other values\n");
18857 WorklistRemover DeadNodes(*this);
18858 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18860 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18861 deleteAndRecombine(N);
18862 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18863 }
18864 }
18865 }
18866
18867 // If this load is directly stored, replace the load value with the stored
18868 // value.
18869 if (auto V = ForwardStoreValueToDirectLoad(LD))
18870 return V;
18871
18872 // Try to infer better alignment information than the load already has.
18873 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18874 !LD->isAtomic()) {
18875 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18876 if (*Alignment > LD->getAlign() &&
18877 isAligned(*Alignment, LD->getSrcValueOffset())) {
18878 SDValue NewLoad = DAG.getExtLoad(
18879 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18880 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18881 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18882 // NewLoad will always be N as we are only refining the alignment
18883 assert(NewLoad.getNode() == N);
18884 (void)NewLoad;
18885 }
18886 }
18887 }
18888
18889 if (LD->isUnindexed()) {
18890 // Walk up chain skipping non-aliasing memory nodes.
18891 SDValue BetterChain = FindBetterChain(LD, Chain);
18892
18893 // If there is a better chain.
18894 if (Chain != BetterChain) {
18895 SDValue ReplLoad;
18896
18897 // Replace the chain to void dependency.
18898 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18899 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18900 BetterChain, Ptr, LD->getMemOperand());
18901 } else {
18902 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18903 LD->getValueType(0),
18904 BetterChain, Ptr, LD->getMemoryVT(),
18905 LD->getMemOperand());
18906 }
18907
18908 // Create token factor to keep old chain connected.
18909 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18910 MVT::Other, Chain, ReplLoad.getValue(1));
18911
18912 // Replace uses with load result and token factor
18913 return CombineTo(N, ReplLoad.getValue(0), Token);
18914 }
18915 }
18916
18917 // Try transforming N to an indexed load.
18918 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18919 return SDValue(N, 0);
18920
18921 // Try to slice up N to more direct loads if the slices are mapped to
18922 // different register banks or pairing can take place.
18923 if (SliceUpLoad(N))
18924 return SDValue(N, 0);
18925
18926 return SDValue();
18927}
18928
18929namespace {
18930
18931/// Helper structure used to slice a load in smaller loads.
18932/// Basically a slice is obtained from the following sequence:
18933/// Origin = load Ty1, Base
18934/// Shift = srl Ty1 Origin, CstTy Amount
18935/// Inst = trunc Shift to Ty2
18936///
18937/// Then, it will be rewritten into:
18938/// Slice = load SliceTy, Base + SliceOffset
18939/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18940///
18941/// SliceTy is deduced from the number of bits that are actually used to
18942/// build Inst.
18943struct LoadedSlice {
18944 /// Helper structure used to compute the cost of a slice.
18945 struct Cost {
18946 /// Are we optimizing for code size.
18947 bool ForCodeSize = false;
18948
18949 /// Various cost.
18950 unsigned Loads = 0;
18951 unsigned Truncates = 0;
18952 unsigned CrossRegisterBanksCopies = 0;
18953 unsigned ZExts = 0;
18954 unsigned Shift = 0;
18955
18956 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18957
18958 /// Get the cost of one isolated slice.
18959 Cost(const LoadedSlice &LS, bool ForCodeSize)
18960 : ForCodeSize(ForCodeSize), Loads(1) {
18961 EVT TruncType = LS.Inst->getValueType(0);
18962 EVT LoadedType = LS.getLoadedType();
18963 if (TruncType != LoadedType &&
18964 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18965 ZExts = 1;
18966 }
18967
18968 /// Account for slicing gain in the current cost.
18969 /// Slicing provide a few gains like removing a shift or a
18970 /// truncate. This method allows to grow the cost of the original
18971 /// load with the gain from this slice.
18972 void addSliceGain(const LoadedSlice &LS) {
18973 // Each slice saves a truncate.
18974 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18975 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18976 ++Truncates;
18977 // If there is a shift amount, this slice gets rid of it.
18978 if (LS.Shift)
18979 ++Shift;
18980 // If this slice can merge a cross register bank copy, account for it.
18981 if (LS.canMergeExpensiveCrossRegisterBankCopy())
18982 ++CrossRegisterBanksCopies;
18983 }
18984
18985 Cost &operator+=(const Cost &RHS) {
18986 Loads += RHS.Loads;
18987 Truncates += RHS.Truncates;
18988 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18989 ZExts += RHS.ZExts;
18990 Shift += RHS.Shift;
18991 return *this;
18992 }
18993
18994 bool operator==(const Cost &RHS) const {
18995 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18996 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18997 ZExts == RHS.ZExts && Shift == RHS.Shift;
18998 }
18999
19000 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19001
19002 bool operator<(const Cost &RHS) const {
19003 // Assume cross register banks copies are as expensive as loads.
19004 // FIXME: Do we want some more target hooks?
19005 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19006 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19007 // Unless we are optimizing for code size, consider the
19008 // expensive operation first.
19009 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19010 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19011 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19012 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19013 }
19014
19015 bool operator>(const Cost &RHS) const { return RHS < *this; }
19016
19017 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19018
19019 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19020 };
19021
19022 // The last instruction that represent the slice. This should be a
19023 // truncate instruction.
19024 SDNode *Inst;
19025
19026 // The original load instruction.
19027 LoadSDNode *Origin;
19028
19029 // The right shift amount in bits from the original load.
19030 unsigned Shift;
19031
19032 // The DAG from which Origin came from.
19033 // This is used to get some contextual information about legal types, etc.
19034 SelectionDAG *DAG;
19035
19036 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19037 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19038 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19039
19040 /// Get the bits used in a chunk of bits \p BitWidth large.
19041 /// \return Result is \p BitWidth and has used bits set to 1 and
19042 /// not used bits set to 0.
19043 APInt getUsedBits() const {
19044 // Reproduce the trunc(lshr) sequence:
19045 // - Start from the truncated value.
19046 // - Zero extend to the desired bit width.
19047 // - Shift left.
19048 assert(Origin && "No original load to compare against.");
19049 unsigned BitWidth = Origin->getValueSizeInBits(0);
19050 assert(Inst && "This slice is not bound to an instruction");
19051 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19052 "Extracted slice is bigger than the whole type!");
19053 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19054 UsedBits.setAllBits();
19055 UsedBits = UsedBits.zext(BitWidth);
19056 UsedBits <<= Shift;
19057 return UsedBits;
19058 }
19059
19060 /// Get the size of the slice to be loaded in bytes.
19061 unsigned getLoadedSize() const {
19062 unsigned SliceSize = getUsedBits().popcount();
19063 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19064 return SliceSize / 8;
19065 }
19066
19067 /// Get the type that will be loaded for this slice.
19068 /// Note: This may not be the final type for the slice.
19069 EVT getLoadedType() const {
19070 assert(DAG && "Missing context");
19071 LLVMContext &Ctxt = *DAG->getContext();
19072 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19073 }
19074
19075 /// Get the alignment of the load used for this slice.
19076 Align getAlign() const {
19077 Align Alignment = Origin->getAlign();
19078 uint64_t Offset = getOffsetFromBase();
19079 if (Offset != 0)
19080 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19081 return Alignment;
19082 }
19083
19084 /// Check if this slice can be rewritten with legal operations.
19085 bool isLegal() const {
19086 // An invalid slice is not legal.
19087 if (!Origin || !Inst || !DAG)
19088 return false;
19089
19090 // Offsets are for indexed load only, we do not handle that.
19091 if (!Origin->getOffset().isUndef())
19092 return false;
19093
19094 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19095
19096 // Check that the type is legal.
19097 EVT SliceType = getLoadedType();
19098 if (!TLI.isTypeLegal(SliceType))
19099 return false;
19100
19101 // Check that the load is legal for this type.
19102 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19103 return false;
19104
19105 // Check that the offset can be computed.
19106 // 1. Check its type.
19107 EVT PtrType = Origin->getBasePtr().getValueType();
19108 if (PtrType == MVT::Untyped || PtrType.isExtended())
19109 return false;
19110
19111 // 2. Check that it fits in the immediate.
19112 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19113 return false;
19114
19115 // 3. Check that the computation is legal.
19116 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19117 return false;
19118
19119 // Check that the zext is legal if it needs one.
19120 EVT TruncateType = Inst->getValueType(0);
19121 if (TruncateType != SliceType &&
19122 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19123 return false;
19124
19125 return true;
19126 }
19127
19128 /// Get the offset in bytes of this slice in the original chunk of
19129 /// bits.
19130 /// \pre DAG != nullptr.
19131 uint64_t getOffsetFromBase() const {
19132 assert(DAG && "Missing context.");
19133 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19134 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19135 uint64_t Offset = Shift / 8;
19136 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19137 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19138 "The size of the original loaded type is not a multiple of a"
19139 " byte.");
19140 // If Offset is bigger than TySizeInBytes, it means we are loading all
19141 // zeros. This should have been optimized before in the process.
19142 assert(TySizeInBytes > Offset &&
19143 "Invalid shift amount for given loaded size");
19144 if (IsBigEndian)
19145 Offset = TySizeInBytes - Offset - getLoadedSize();
19146 return Offset;
19147 }
19148
19149 /// Generate the sequence of instructions to load the slice
19150 /// represented by this object and redirect the uses of this slice to
19151 /// this new sequence of instructions.
19152 /// \pre this->Inst && this->Origin are valid Instructions and this
19153 /// object passed the legal check: LoadedSlice::isLegal returned true.
19154 /// \return The last instruction of the sequence used to load the slice.
19155 SDValue loadSlice() const {
19156 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19157 const SDValue &OldBaseAddr = Origin->getBasePtr();
19158 SDValue BaseAddr = OldBaseAddr;
19159 // Get the offset in that chunk of bytes w.r.t. the endianness.
19160 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19161 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19162 if (Offset) {
19163 // BaseAddr = BaseAddr + Offset.
19164 EVT ArithType = BaseAddr.getValueType();
19165 SDLoc DL(Origin);
19166 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19167 DAG->getConstant(Offset, DL, ArithType));
19168 }
19169
19170 // Create the type of the loaded slice according to its size.
19171 EVT SliceType = getLoadedType();
19172
19173 // Create the load for the slice.
19174 SDValue LastInst =
19175 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19177 Origin->getMemOperand()->getFlags());
19178 // If the final type is not the same as the loaded type, this means that
19179 // we have to pad with zero. Create a zero extend for that.
19180 EVT FinalType = Inst->getValueType(0);
19181 if (SliceType != FinalType)
19182 LastInst =
19183 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19184 return LastInst;
19185 }
19186
19187 /// Check if this slice can be merged with an expensive cross register
19188 /// bank copy. E.g.,
19189 /// i = load i32
19190 /// f = bitcast i32 i to float
19191 bool canMergeExpensiveCrossRegisterBankCopy() const {
19192 if (!Inst || !Inst->hasOneUse())
19193 return false;
19194 SDNode *Use = *Inst->use_begin();
19195 if (Use->getOpcode() != ISD::BITCAST)
19196 return false;
19197 assert(DAG && "Missing context");
19198 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19199 EVT ResVT = Use->getValueType(0);
19200 const TargetRegisterClass *ResRC =
19201 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19202 const TargetRegisterClass *ArgRC =
19203 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19204 Use->getOperand(0)->isDivergent());
19205 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19206 return false;
19207
19208 // At this point, we know that we perform a cross-register-bank copy.
19209 // Check if it is expensive.
19211 // Assume bitcasts are cheap, unless both register classes do not
19212 // explicitly share a common sub class.
19213 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19214 return false;
19215
19216 // Check if it will be merged with the load.
19217 // 1. Check the alignment / fast memory access constraint.
19218 unsigned IsFast = 0;
19219 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19220 Origin->getAddressSpace(), getAlign(),
19221 Origin->getMemOperand()->getFlags(), &IsFast) ||
19222 !IsFast)
19223 return false;
19224
19225 // 2. Check that the load is a legal operation for that type.
19226 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19227 return false;
19228
19229 // 3. Check that we do not have a zext in the way.
19230 if (Inst->getValueType(0) != getLoadedType())
19231 return false;
19232
19233 return true;
19234 }
19235};
19236
19237} // end anonymous namespace
19238
19239/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19240/// \p UsedBits looks like 0..0 1..1 0..0.
19241static bool areUsedBitsDense(const APInt &UsedBits) {
19242 // If all the bits are one, this is dense!
19243 if (UsedBits.isAllOnes())
19244 return true;
19245
19246 // Get rid of the unused bits on the right.
19247 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19248 // Get rid of the unused bits on the left.
19249 if (NarrowedUsedBits.countl_zero())
19250 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19251 // Check that the chunk of bits is completely used.
19252 return NarrowedUsedBits.isAllOnes();
19253}
19254
19255/// Check whether or not \p First and \p Second are next to each other
19256/// in memory. This means that there is no hole between the bits loaded
19257/// by \p First and the bits loaded by \p Second.
19258static bool areSlicesNextToEachOther(const LoadedSlice &First,
19259 const LoadedSlice &Second) {
19260 assert(First.Origin == Second.Origin && First.Origin &&
19261 "Unable to match different memory origins.");
19262 APInt UsedBits = First.getUsedBits();
19263 assert((UsedBits & Second.getUsedBits()) == 0 &&
19264 "Slices are not supposed to overlap.");
19265 UsedBits |= Second.getUsedBits();
19266 return areUsedBitsDense(UsedBits);
19267}
19268
19269/// Adjust the \p GlobalLSCost according to the target
19270/// paring capabilities and the layout of the slices.
19271/// \pre \p GlobalLSCost should account for at least as many loads as
19272/// there is in the slices in \p LoadedSlices.
19274 LoadedSlice::Cost &GlobalLSCost) {
19275 unsigned NumberOfSlices = LoadedSlices.size();
19276 // If there is less than 2 elements, no pairing is possible.
19277 if (NumberOfSlices < 2)
19278 return;
19279
19280 // Sort the slices so that elements that are likely to be next to each
19281 // other in memory are next to each other in the list.
19282 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19283 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19284 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19285 });
19286 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19287 // First (resp. Second) is the first (resp. Second) potentially candidate
19288 // to be placed in a paired load.
19289 const LoadedSlice *First = nullptr;
19290 const LoadedSlice *Second = nullptr;
19291 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19292 // Set the beginning of the pair.
19293 First = Second) {
19294 Second = &LoadedSlices[CurrSlice];
19295
19296 // If First is NULL, it means we start a new pair.
19297 // Get to the next slice.
19298 if (!First)
19299 continue;
19300
19301 EVT LoadedType = First->getLoadedType();
19302
19303 // If the types of the slices are different, we cannot pair them.
19304 if (LoadedType != Second->getLoadedType())
19305 continue;
19306
19307 // Check if the target supplies paired loads for this type.
19308 Align RequiredAlignment;
19309 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19310 // move to the next pair, this type is hopeless.
19311 Second = nullptr;
19312 continue;
19313 }
19314 // Check if we meet the alignment requirement.
19315 if (First->getAlign() < RequiredAlignment)
19316 continue;
19317
19318 // Check that both loads are next to each other in memory.
19319 if (!areSlicesNextToEachOther(*First, *Second))
19320 continue;
19321
19322 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19323 --GlobalLSCost.Loads;
19324 // Move to the next pair.
19325 Second = nullptr;
19326 }
19327}
19328
19329/// Check the profitability of all involved LoadedSlice.
19330/// Currently, it is considered profitable if there is exactly two
19331/// involved slices (1) which are (2) next to each other in memory, and
19332/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19333///
19334/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19335/// the elements themselves.
19336///
19337/// FIXME: When the cost model will be mature enough, we can relax
19338/// constraints (1) and (2).
19340 const APInt &UsedBits, bool ForCodeSize) {
19341 unsigned NumberOfSlices = LoadedSlices.size();
19343 return NumberOfSlices > 1;
19344
19345 // Check (1).
19346 if (NumberOfSlices != 2)
19347 return false;
19348
19349 // Check (2).
19350 if (!areUsedBitsDense(UsedBits))
19351 return false;
19352
19353 // Check (3).
19354 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19355 // The original code has one big load.
19356 OrigCost.Loads = 1;
19357 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19358 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19359 // Accumulate the cost of all the slices.
19360 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19361 GlobalSlicingCost += SliceCost;
19362
19363 // Account as cost in the original configuration the gain obtained
19364 // with the current slices.
19365 OrigCost.addSliceGain(LS);
19366 }
19367
19368 // If the target supports paired load, adjust the cost accordingly.
19369 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19370 return OrigCost > GlobalSlicingCost;
19371}
19372
19373/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19374/// operations, split it in the various pieces being extracted.
19375///
19376/// This sort of thing is introduced by SROA.
19377/// This slicing takes care not to insert overlapping loads.
19378/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19379bool DAGCombiner::SliceUpLoad(SDNode *N) {
19380 if (Level < AfterLegalizeDAG)
19381 return false;
19382
19383 LoadSDNode *LD = cast<LoadSDNode>(N);
19384 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19385 !LD->getValueType(0).isInteger())
19386 return false;
19387
19388 // The algorithm to split up a load of a scalable vector into individual
19389 // elements currently requires knowing the length of the loaded type,
19390 // so will need adjusting to work on scalable vectors.
19391 if (LD->getValueType(0).isScalableVector())
19392 return false;
19393
19394 // Keep track of already used bits to detect overlapping values.
19395 // In that case, we will just abort the transformation.
19396 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19397
19398 SmallVector<LoadedSlice, 4> LoadedSlices;
19399
19400 // Check if this load is used as several smaller chunks of bits.
19401 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19402 // of computation for each trunc.
19403 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19404 UI != UIEnd; ++UI) {
19405 // Skip the uses of the chain.
19406 if (UI.getUse().getResNo() != 0)
19407 continue;
19408
19409 SDNode *User = *UI;
19410 unsigned Shift = 0;
19411
19412 // Check if this is a trunc(lshr).
19413 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19414 isa<ConstantSDNode>(User->getOperand(1))) {
19415 Shift = User->getConstantOperandVal(1);
19416 User = *User->use_begin();
19417 }
19418
19419 // At this point, User is a Truncate, iff we encountered, trunc or
19420 // trunc(lshr).
19421 if (User->getOpcode() != ISD::TRUNCATE)
19422 return false;
19423
19424 // The width of the type must be a power of 2 and greater than 8-bits.
19425 // Otherwise the load cannot be represented in LLVM IR.
19426 // Moreover, if we shifted with a non-8-bits multiple, the slice
19427 // will be across several bytes. We do not support that.
19428 unsigned Width = User->getValueSizeInBits(0);
19429 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19430 return false;
19431
19432 // Build the slice for this chain of computations.
19433 LoadedSlice LS(User, LD, Shift, &DAG);
19434 APInt CurrentUsedBits = LS.getUsedBits();
19435
19436 // Check if this slice overlaps with another.
19437 if ((CurrentUsedBits & UsedBits) != 0)
19438 return false;
19439 // Update the bits used globally.
19440 UsedBits |= CurrentUsedBits;
19441
19442 // Check if the new slice would be legal.
19443 if (!LS.isLegal())
19444 return false;
19445
19446 // Record the slice.
19447 LoadedSlices.push_back(LS);
19448 }
19449
19450 // Abort slicing if it does not seem to be profitable.
19451 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19452 return false;
19453
19454 ++SlicedLoads;
19455
19456 // Rewrite each chain to use an independent load.
19457 // By construction, each chain can be represented by a unique load.
19458
19459 // Prepare the argument for the new token factor for all the slices.
19460 SmallVector<SDValue, 8> ArgChains;
19461 for (const LoadedSlice &LS : LoadedSlices) {
19462 SDValue SliceInst = LS.loadSlice();
19463 CombineTo(LS.Inst, SliceInst, true);
19464 if (SliceInst.getOpcode() != ISD::LOAD)
19465 SliceInst = SliceInst.getOperand(0);
19466 assert(SliceInst->getOpcode() == ISD::LOAD &&
19467 "It takes more than a zext to get to the loaded slice!!");
19468 ArgChains.push_back(SliceInst.getValue(1));
19469 }
19470
19471 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19472 ArgChains);
19473 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19474 AddToWorklist(Chain.getNode());
19475 return true;
19476}
19477
19478/// Check to see if V is (and load (ptr), imm), where the load is having
19479/// specific bytes cleared out. If so, return the byte size being masked out
19480/// and the shift amount.
19481static std::pair<unsigned, unsigned>
19483 std::pair<unsigned, unsigned> Result(0, 0);
19484
19485 // Check for the structure we're looking for.
19486 if (V->getOpcode() != ISD::AND ||
19487 !isa<ConstantSDNode>(V->getOperand(1)) ||
19488 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19489 return Result;
19490
19491 // Check the chain and pointer.
19492 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19493 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19494
19495 // This only handles simple types.
19496 if (V.getValueType() != MVT::i16 &&
19497 V.getValueType() != MVT::i32 &&
19498 V.getValueType() != MVT::i64)
19499 return Result;
19500
19501 // Check the constant mask. Invert it so that the bits being masked out are
19502 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19503 // follow the sign bit for uniformity.
19504 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19505 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19506 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19507 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19508 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19509 if (NotMaskLZ == 64) return Result; // All zero mask.
19510
19511 // See if we have a continuous run of bits. If so, we have 0*1+0*
19512 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19513 return Result;
19514
19515 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19516 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19517 NotMaskLZ -= 64-V.getValueSizeInBits();
19518
19519 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19520 switch (MaskedBytes) {
19521 case 1:
19522 case 2:
19523 case 4: break;
19524 default: return Result; // All one mask, or 5-byte mask.
19525 }
19526
19527 // Verify that the first bit starts at a multiple of mask so that the access
19528 // is aligned the same as the access width.
19529 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19530
19531 // For narrowing to be valid, it must be the case that the load the
19532 // immediately preceding memory operation before the store.
19533 if (LD == Chain.getNode())
19534 ; // ok.
19535 else if (Chain->getOpcode() == ISD::TokenFactor &&
19536 SDValue(LD, 1).hasOneUse()) {
19537 // LD has only 1 chain use so they are no indirect dependencies.
19538 if (!LD->isOperandOf(Chain.getNode()))
19539 return Result;
19540 } else
19541 return Result; // Fail.
19542
19543 Result.first = MaskedBytes;
19544 Result.second = NotMaskTZ/8;
19545 return Result;
19546}
19547
19548/// Check to see if IVal is something that provides a value as specified by
19549/// MaskInfo. If so, replace the specified store with a narrower store of
19550/// truncated IVal.
19551static SDValue
19552ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19553 SDValue IVal, StoreSDNode *St,
19554 DAGCombiner *DC) {
19555 unsigned NumBytes = MaskInfo.first;
19556 unsigned ByteShift = MaskInfo.second;
19557 SelectionDAG &DAG = DC->getDAG();
19558
19559 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19560 // that uses this. If not, this is not a replacement.
19561 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19562 ByteShift*8, (ByteShift+NumBytes)*8);
19563 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19564
19565 // Check that it is legal on the target to do this. It is legal if the new
19566 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19567 // legalization. If the source type is legal, but the store type isn't, see
19568 // if we can use a truncating store.
19569 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19570 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19571 bool UseTruncStore;
19572 if (DC->isTypeLegal(VT))
19573 UseTruncStore = false;
19574 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19575 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19576 UseTruncStore = true;
19577 else
19578 return SDValue();
19579
19580 // Can't do this for indexed stores.
19581 if (St->isIndexed())
19582 return SDValue();
19583
19584 // Check that the target doesn't think this is a bad idea.
19585 if (St->getMemOperand() &&
19586 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19587 *St->getMemOperand()))
19588 return SDValue();
19589
19590 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19591 // shifted by ByteShift and truncated down to NumBytes.
19592 if (ByteShift) {
19593 SDLoc DL(IVal);
19594 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19595 DAG.getConstant(ByteShift*8, DL,
19596 DC->getShiftAmountTy(IVal.getValueType())));
19597 }
19598
19599 // Figure out the offset for the store and the alignment of the access.
19600 unsigned StOffset;
19601 if (DAG.getDataLayout().isLittleEndian())
19602 StOffset = ByteShift;
19603 else
19604 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19605
19606 SDValue Ptr = St->getBasePtr();
19607 if (StOffset) {
19608 SDLoc DL(IVal);
19610 }
19611
19612 ++OpsNarrowed;
19613 if (UseTruncStore)
19614 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19615 St->getPointerInfo().getWithOffset(StOffset),
19616 VT, St->getOriginalAlign());
19617
19618 // Truncate down to the new size.
19619 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19620
19621 return DAG
19622 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19623 St->getPointerInfo().getWithOffset(StOffset),
19624 St->getOriginalAlign());
19625}
19626
19627/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19628/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19629/// narrowing the load and store if it would end up being a win for performance
19630/// or code size.
19631SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19632 StoreSDNode *ST = cast<StoreSDNode>(N);
19633 if (!ST->isSimple())
19634 return SDValue();
19635
19636 SDValue Chain = ST->getChain();
19637 SDValue Value = ST->getValue();
19638 SDValue Ptr = ST->getBasePtr();
19639 EVT VT = Value.getValueType();
19640
19641 if (ST->isTruncatingStore() || VT.isVector())
19642 return SDValue();
19643
19644 unsigned Opc = Value.getOpcode();
19645
19646 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19647 !Value.hasOneUse())
19648 return SDValue();
19649
19650 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19651 // is a byte mask indicating a consecutive number of bytes, check to see if
19652 // Y is known to provide just those bytes. If so, we try to replace the
19653 // load + replace + store sequence with a single (narrower) store, which makes
19654 // the load dead.
19656 std::pair<unsigned, unsigned> MaskedLoad;
19657 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19658 if (MaskedLoad.first)
19659 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19660 Value.getOperand(1), ST,this))
19661 return NewST;
19662
19663 // Or is commutative, so try swapping X and Y.
19664 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19665 if (MaskedLoad.first)
19666 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19667 Value.getOperand(0), ST,this))
19668 return NewST;
19669 }
19670
19672 return SDValue();
19673
19674 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19675 return SDValue();
19676
19677 SDValue N0 = Value.getOperand(0);
19678 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19679 Chain == SDValue(N0.getNode(), 1)) {
19680 LoadSDNode *LD = cast<LoadSDNode>(N0);
19681 if (LD->getBasePtr() != Ptr ||
19682 LD->getPointerInfo().getAddrSpace() !=
19683 ST->getPointerInfo().getAddrSpace())
19684 return SDValue();
19685
19686 // Find the type to narrow it the load / op / store to.
19687 SDValue N1 = Value.getOperand(1);
19688 unsigned BitWidth = N1.getValueSizeInBits();
19689 APInt Imm = N1->getAsAPIntVal();
19690 if (Opc == ISD::AND)
19692 if (Imm == 0 || Imm.isAllOnes())
19693 return SDValue();
19694 unsigned ShAmt = Imm.countr_zero();
19695 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19696 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19697 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19698 // The narrowing should be profitable, the load/store operation should be
19699 // legal (or custom) and the store size should be equal to the NewVT width.
19700 while (NewBW < BitWidth &&
19701 (NewVT.getStoreSizeInBits() != NewBW ||
19702 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19703 !TLI.isNarrowingProfitable(VT, NewVT))) {
19704 NewBW = NextPowerOf2(NewBW);
19705 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19706 }
19707 if (NewBW >= BitWidth)
19708 return SDValue();
19709
19710 // If the lsb changed does not start at the type bitwidth boundary,
19711 // start at the previous one.
19712 if (ShAmt % NewBW)
19713 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19715 std::min(BitWidth, ShAmt + NewBW));
19716 if ((Imm & Mask) == Imm) {
19717 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19718 if (Opc == ISD::AND)
19719 NewImm ^= APInt::getAllOnes(NewBW);
19720 uint64_t PtrOff = ShAmt / 8;
19721 // For big endian targets, we need to adjust the offset to the pointer to
19722 // load the correct bytes.
19723 if (DAG.getDataLayout().isBigEndian())
19724 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19725
19726 unsigned IsFast = 0;
19727 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19728 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19729 LD->getAddressSpace(), NewAlign,
19730 LD->getMemOperand()->getFlags(), &IsFast) ||
19731 !IsFast)
19732 return SDValue();
19733
19734 SDValue NewPtr =
19736 SDValue NewLD =
19737 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19738 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19739 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19740 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19741 DAG.getConstant(NewImm, SDLoc(Value),
19742 NewVT));
19743 SDValue NewST =
19744 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19745 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19746
19747 AddToWorklist(NewPtr.getNode());
19748 AddToWorklist(NewLD.getNode());
19749 AddToWorklist(NewVal.getNode());
19750 WorklistRemover DeadNodes(*this);
19751 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19752 ++OpsNarrowed;
19753 return NewST;
19754 }
19755 }
19756
19757 return SDValue();
19758}
19759
19760/// For a given floating point load / store pair, if the load value isn't used
19761/// by any other operations, then consider transforming the pair to integer
19762/// load / store operations if the target deems the transformation profitable.
19763SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19764 StoreSDNode *ST = cast<StoreSDNode>(N);
19765 SDValue Value = ST->getValue();
19766 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19767 Value.hasOneUse()) {
19768 LoadSDNode *LD = cast<LoadSDNode>(Value);
19769 EVT VT = LD->getMemoryVT();
19770 if (!VT.isFloatingPoint() ||
19771 VT != ST->getMemoryVT() ||
19772 LD->isNonTemporal() ||
19773 ST->isNonTemporal() ||
19774 LD->getPointerInfo().getAddrSpace() != 0 ||
19775 ST->getPointerInfo().getAddrSpace() != 0)
19776 return SDValue();
19777
19778 TypeSize VTSize = VT.getSizeInBits();
19779
19780 // We don't know the size of scalable types at compile time so we cannot
19781 // create an integer of the equivalent size.
19782 if (VTSize.isScalable())
19783 return SDValue();
19784
19785 unsigned FastLD = 0, FastST = 0;
19786 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19787 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19788 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19791 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19792 *LD->getMemOperand(), &FastLD) ||
19793 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19794 *ST->getMemOperand(), &FastST) ||
19795 !FastLD || !FastST)
19796 return SDValue();
19797
19798 SDValue NewLD =
19799 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19800 LD->getPointerInfo(), LD->getAlign());
19801
19802 SDValue NewST =
19803 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19804 ST->getPointerInfo(), ST->getAlign());
19805
19806 AddToWorklist(NewLD.getNode());
19807 AddToWorklist(NewST.getNode());
19808 WorklistRemover DeadNodes(*this);
19809 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19810 ++LdStFP2Int;
19811 return NewST;
19812 }
19813
19814 return SDValue();
19815}
19816
19817// This is a helper function for visitMUL to check the profitability
19818// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19819// MulNode is the original multiply, AddNode is (add x, c1),
19820// and ConstNode is c2.
19821//
19822// If the (add x, c1) has multiple uses, we could increase
19823// the number of adds if we make this transformation.
19824// It would only be worth doing this if we can remove a
19825// multiply in the process. Check for that here.
19826// To illustrate:
19827// (A + c1) * c3
19828// (A + c2) * c3
19829// We're checking for cases where we have common "c3 * A" expressions.
19830bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19831 SDValue ConstNode) {
19832 APInt Val;
19833
19834 // If the add only has one use, and the target thinks the folding is
19835 // profitable or does not lead to worse code, this would be OK to do.
19836 if (AddNode->hasOneUse() &&
19837 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19838 return true;
19839
19840 // Walk all the users of the constant with which we're multiplying.
19841 for (SDNode *Use : ConstNode->uses()) {
19842 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19843 continue;
19844
19845 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19846 SDNode *OtherOp;
19847 SDNode *MulVar = AddNode.getOperand(0).getNode();
19848
19849 // OtherOp is what we're multiplying against the constant.
19850 if (Use->getOperand(0) == ConstNode)
19851 OtherOp = Use->getOperand(1).getNode();
19852 else
19853 OtherOp = Use->getOperand(0).getNode();
19854
19855 // Check to see if multiply is with the same operand of our "add".
19856 //
19857 // ConstNode = CONST
19858 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19859 // ...
19860 // AddNode = (A + c1) <-- MulVar is A.
19861 // = AddNode * ConstNode <-- current visiting instruction.
19862 //
19863 // If we make this transformation, we will have a common
19864 // multiply (ConstNode * A) that we can save.
19865 if (OtherOp == MulVar)
19866 return true;
19867
19868 // Now check to see if a future expansion will give us a common
19869 // multiply.
19870 //
19871 // ConstNode = CONST
19872 // AddNode = (A + c1)
19873 // ... = AddNode * ConstNode <-- current visiting instruction.
19874 // ...
19875 // OtherOp = (A + c2)
19876 // Use = OtherOp * ConstNode <-- visiting Use.
19877 //
19878 // If we make this transformation, we will have a common
19879 // multiply (CONST * A) after we also do the same transformation
19880 // to the "t2" instruction.
19881 if (OtherOp->getOpcode() == ISD::ADD &&
19883 OtherOp->getOperand(0).getNode() == MulVar)
19884 return true;
19885 }
19886 }
19887
19888 // Didn't find a case where this would be profitable.
19889 return false;
19890}
19891
19892SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19893 unsigned NumStores) {
19896 SDLoc StoreDL(StoreNodes[0].MemNode);
19897
19898 for (unsigned i = 0; i < NumStores; ++i) {
19899 Visited.insert(StoreNodes[i].MemNode);
19900 }
19901
19902 // don't include nodes that are children or repeated nodes.
19903 for (unsigned i = 0; i < NumStores; ++i) {
19904 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19905 Chains.push_back(StoreNodes[i].MemNode->getChain());
19906 }
19907
19908 assert(!Chains.empty() && "Chain should have generated a chain");
19909 return DAG.getTokenFactor(StoreDL, Chains);
19910}
19911
19912bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19913 const Value *UnderlyingObj = nullptr;
19914 for (const auto &MemOp : StoreNodes) {
19915 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19916 // Pseudo value like stack frame has its own frame index and size, should
19917 // not use the first store's frame index for other frames.
19918 if (MMO->getPseudoValue())
19919 return false;
19920
19921 if (!MMO->getValue())
19922 return false;
19923
19924 const Value *Obj = getUnderlyingObject(MMO->getValue());
19925
19926 if (UnderlyingObj && UnderlyingObj != Obj)
19927 return false;
19928
19929 if (!UnderlyingObj)
19930 UnderlyingObj = Obj;
19931 }
19932
19933 return true;
19934}
19935
19936bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19937 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19938 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19939 // Make sure we have something to merge.
19940 if (NumStores < 2)
19941 return false;
19942
19943 assert((!UseTrunc || !UseVector) &&
19944 "This optimization cannot emit a vector truncating store");
19945
19946 // The latest Node in the DAG.
19947 SDLoc DL(StoreNodes[0].MemNode);
19948
19949 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19950 unsigned SizeInBits = NumStores * ElementSizeBits;
19951 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19952
19953 std::optional<MachineMemOperand::Flags> Flags;
19954 AAMDNodes AAInfo;
19955 for (unsigned I = 0; I != NumStores; ++I) {
19956 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19957 if (!Flags) {
19958 Flags = St->getMemOperand()->getFlags();
19959 AAInfo = St->getAAInfo();
19960 continue;
19961 }
19962 // Skip merging if there's an inconsistent flag.
19963 if (Flags != St->getMemOperand()->getFlags())
19964 return false;
19965 // Concatenate AA metadata.
19966 AAInfo = AAInfo.concat(St->getAAInfo());
19967 }
19968
19969 EVT StoreTy;
19970 if (UseVector) {
19971 unsigned Elts = NumStores * NumMemElts;
19972 // Get the type for the merged vector store.
19973 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19974 } else
19975 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19976
19977 SDValue StoredVal;
19978 if (UseVector) {
19979 if (IsConstantSrc) {
19980 SmallVector<SDValue, 8> BuildVector;
19981 for (unsigned I = 0; I != NumStores; ++I) {
19982 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19983 SDValue Val = St->getValue();
19984 // If constant is of the wrong type, convert it now. This comes up
19985 // when one of our stores was truncating.
19986 if (MemVT != Val.getValueType()) {
19987 Val = peekThroughBitcasts(Val);
19988 // Deal with constants of wrong size.
19989 if (ElementSizeBits != Val.getValueSizeInBits()) {
19990 auto *C = dyn_cast<ConstantSDNode>(Val);
19991 if (!C)
19992 // Not clear how to truncate FP values.
19993 // TODO: Handle truncation of build_vector constants
19994 return false;
19995
19996 EVT IntMemVT =
19998 Val = DAG.getConstant(C->getAPIntValue()
19999 .zextOrTrunc(Val.getValueSizeInBits())
20000 .zextOrTrunc(ElementSizeBits),
20001 SDLoc(C), IntMemVT);
20002 }
20003 // Make sure correctly size type is the correct type.
20004 Val = DAG.getBitcast(MemVT, Val);
20005 }
20006 BuildVector.push_back(Val);
20007 }
20008 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20010 DL, StoreTy, BuildVector);
20011 } else {
20013 for (unsigned i = 0; i < NumStores; ++i) {
20014 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20016 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20017 // type MemVT. If the underlying value is not the correct
20018 // type, but it is an extraction of an appropriate vector we
20019 // can recast Val to be of the correct type. This may require
20020 // converting between EXTRACT_VECTOR_ELT and
20021 // EXTRACT_SUBVECTOR.
20022 if ((MemVT != Val.getValueType()) &&
20025 EVT MemVTScalarTy = MemVT.getScalarType();
20026 // We may need to add a bitcast here to get types to line up.
20027 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20028 Val = DAG.getBitcast(MemVT, Val);
20029 } else if (MemVT.isVector() &&
20031 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20032 } else {
20033 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20035 SDValue Vec = Val.getOperand(0);
20036 SDValue Idx = Val.getOperand(1);
20037 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20038 }
20039 }
20040 Ops.push_back(Val);
20041 }
20042
20043 // Build the extracted vector elements back into a vector.
20044 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20046 DL, StoreTy, Ops);
20047 }
20048 } else {
20049 // We should always use a vector store when merging extracted vector
20050 // elements, so this path implies a store of constants.
20051 assert(IsConstantSrc && "Merged vector elements should use vector store");
20052
20053 APInt StoreInt(SizeInBits, 0);
20054
20055 // Construct a single integer constant which is made of the smaller
20056 // constant inputs.
20057 bool IsLE = DAG.getDataLayout().isLittleEndian();
20058 for (unsigned i = 0; i < NumStores; ++i) {
20059 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20060 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20061
20062 SDValue Val = St->getValue();
20063 Val = peekThroughBitcasts(Val);
20064 StoreInt <<= ElementSizeBits;
20065 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20066 StoreInt |= C->getAPIntValue()
20067 .zextOrTrunc(ElementSizeBits)
20068 .zextOrTrunc(SizeInBits);
20069 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20070 StoreInt |= C->getValueAPF()
20071 .bitcastToAPInt()
20072 .zextOrTrunc(ElementSizeBits)
20073 .zextOrTrunc(SizeInBits);
20074 // If fp truncation is necessary give up for now.
20075 if (MemVT.getSizeInBits() != ElementSizeBits)
20076 return false;
20077 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20079 // Not yet handled
20080 return false;
20081 } else {
20082 llvm_unreachable("Invalid constant element type");
20083 }
20084 }
20085
20086 // Create the new Load and Store operations.
20087 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20088 }
20089
20090 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20091 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20092 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20093
20094 // make sure we use trunc store if it's necessary to be legal.
20095 // When generate the new widen store, if the first store's pointer info can
20096 // not be reused, discard the pointer info except the address space because
20097 // now the widen store can not be represented by the original pointer info
20098 // which is for the narrow memory object.
20099 SDValue NewStore;
20100 if (!UseTrunc) {
20101 NewStore = DAG.getStore(
20102 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20103 CanReusePtrInfo
20104 ? FirstInChain->getPointerInfo()
20105 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20106 FirstInChain->getAlign(), *Flags, AAInfo);
20107 } else { // Must be realized as a trunc store
20108 EVT LegalizedStoredValTy =
20109 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20110 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20111 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20112 SDValue ExtendedStoreVal =
20113 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20114 LegalizedStoredValTy);
20115 NewStore = DAG.getTruncStore(
20116 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20117 CanReusePtrInfo
20118 ? FirstInChain->getPointerInfo()
20119 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20120 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20121 AAInfo);
20122 }
20123
20124 // Replace all merged stores with the new store.
20125 for (unsigned i = 0; i < NumStores; ++i)
20126 CombineTo(StoreNodes[i].MemNode, NewStore);
20127
20128 AddToWorklist(NewChain.getNode());
20129 return true;
20130}
20131
20132void DAGCombiner::getStoreMergeCandidates(
20133 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20134 SDNode *&RootNode) {
20135 // This holds the base pointer, index, and the offset in bytes from the base
20136 // pointer. We must have a base and an offset. Do not handle stores to undef
20137 // base pointers.
20139 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20140 return;
20141
20143 StoreSource StoreSrc = getStoreSource(Val);
20144 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20145
20146 // Match on loadbaseptr if relevant.
20147 EVT MemVT = St->getMemoryVT();
20148 BaseIndexOffset LBasePtr;
20149 EVT LoadVT;
20150 if (StoreSrc == StoreSource::Load) {
20151 auto *Ld = cast<LoadSDNode>(Val);
20152 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20153 LoadVT = Ld->getMemoryVT();
20154 // Load and store should be the same type.
20155 if (MemVT != LoadVT)
20156 return;
20157 // Loads must only have one use.
20158 if (!Ld->hasNUsesOfValue(1, 0))
20159 return;
20160 // The memory operands must not be volatile/indexed/atomic.
20161 // TODO: May be able to relax for unordered atomics (see D66309)
20162 if (!Ld->isSimple() || Ld->isIndexed())
20163 return;
20164 }
20165 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20166 int64_t &Offset) -> bool {
20167 // The memory operands must not be volatile/indexed/atomic.
20168 // TODO: May be able to relax for unordered atomics (see D66309)
20169 if (!Other->isSimple() || Other->isIndexed())
20170 return false;
20171 // Don't mix temporal stores with non-temporal stores.
20172 if (St->isNonTemporal() != Other->isNonTemporal())
20173 return false;
20175 return false;
20176 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20177 // Allow merging constants of different types as integers.
20178 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20179 : Other->getMemoryVT() != MemVT;
20180 switch (StoreSrc) {
20181 case StoreSource::Load: {
20182 if (NoTypeMatch)
20183 return false;
20184 // The Load's Base Ptr must also match.
20185 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20186 if (!OtherLd)
20187 return false;
20188 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20189 if (LoadVT != OtherLd->getMemoryVT())
20190 return false;
20191 // Loads must only have one use.
20192 if (!OtherLd->hasNUsesOfValue(1, 0))
20193 return false;
20194 // The memory operands must not be volatile/indexed/atomic.
20195 // TODO: May be able to relax for unordered atomics (see D66309)
20196 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20197 return false;
20198 // Don't mix temporal loads with non-temporal loads.
20199 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20200 return false;
20201 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20202 *OtherLd))
20203 return false;
20204 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20205 return false;
20206 break;
20207 }
20208 case StoreSource::Constant:
20209 if (NoTypeMatch)
20210 return false;
20211 if (getStoreSource(OtherBC) != StoreSource::Constant)
20212 return false;
20213 break;
20214 case StoreSource::Extract:
20215 // Do not merge truncated stores here.
20216 if (Other->isTruncatingStore())
20217 return false;
20218 if (!MemVT.bitsEq(OtherBC.getValueType()))
20219 return false;
20220 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20221 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20222 return false;
20223 break;
20224 default:
20225 llvm_unreachable("Unhandled store source for merging");
20226 }
20228 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20229 };
20230
20231 // Check if the pair of StoreNode and the RootNode already bail out many
20232 // times which is over the limit in dependence check.
20233 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20234 SDNode *RootNode) -> bool {
20235 auto RootCount = StoreRootCountMap.find(StoreNode);
20236 return RootCount != StoreRootCountMap.end() &&
20237 RootCount->second.first == RootNode &&
20238 RootCount->second.second > StoreMergeDependenceLimit;
20239 };
20240
20241 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20242 // This must be a chain use.
20243 if (UseIter.getOperandNo() != 0)
20244 return;
20245 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20247 int64_t PtrDiff;
20248 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20249 !OverLimitInDependenceCheck(OtherStore, RootNode))
20250 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20251 }
20252 };
20253
20254 // We looking for a root node which is an ancestor to all mergable
20255 // stores. We search up through a load, to our root and then down
20256 // through all children. For instance we will find Store{1,2,3} if
20257 // St is Store1, Store2. or Store3 where the root is not a load
20258 // which always true for nonvolatile ops. TODO: Expand
20259 // the search to find all valid candidates through multiple layers of loads.
20260 //
20261 // Root
20262 // |-------|-------|
20263 // Load Load Store3
20264 // | |
20265 // Store1 Store2
20266 //
20267 // FIXME: We should be able to climb and
20268 // descend TokenFactors to find candidates as well.
20269
20270 RootNode = St->getChain().getNode();
20271
20272 unsigned NumNodesExplored = 0;
20273 const unsigned MaxSearchNodes = 1024;
20274 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20275 RootNode = Ldn->getChain().getNode();
20276 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20277 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20278 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20279 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20280 TryToAddCandidate(I2);
20281 }
20282 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20283 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20284 TryToAddCandidate(I);
20285 }
20286 }
20287 } else {
20288 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20289 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20290 TryToAddCandidate(I);
20291 }
20292}
20293
20294// We need to check that merging these stores does not cause a loop in the
20295// DAG. Any store candidate may depend on another candidate indirectly through
20296// its operands. Check in parallel by searching up from operands of candidates.
20297bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20298 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20299 SDNode *RootNode) {
20300 // FIXME: We should be able to truncate a full search of
20301 // predecessors by doing a BFS and keeping tabs the originating
20302 // stores from which worklist nodes come from in a similar way to
20303 // TokenFactor simplfication.
20304
20307
20308 // RootNode is a predecessor to all candidates so we need not search
20309 // past it. Add RootNode (peeking through TokenFactors). Do not count
20310 // these towards size check.
20311
20312 Worklist.push_back(RootNode);
20313 while (!Worklist.empty()) {
20314 auto N = Worklist.pop_back_val();
20315 if (!Visited.insert(N).second)
20316 continue; // Already present in Visited.
20317 if (N->getOpcode() == ISD::TokenFactor) {
20318 for (SDValue Op : N->ops())
20319 Worklist.push_back(Op.getNode());
20320 }
20321 }
20322
20323 // Don't count pruning nodes towards max.
20324 unsigned int Max = 1024 + Visited.size();
20325 // Search Ops of store candidates.
20326 for (unsigned i = 0; i < NumStores; ++i) {
20327 SDNode *N = StoreNodes[i].MemNode;
20328 // Of the 4 Store Operands:
20329 // * Chain (Op 0) -> We have already considered these
20330 // in candidate selection, but only by following the
20331 // chain dependencies. We could still have a chain
20332 // dependency to a load, that has a non-chain dep to
20333 // another load, that depends on a store, etc. So it is
20334 // possible to have dependencies that consist of a mix
20335 // of chain and non-chain deps, and we need to include
20336 // chain operands in the analysis here..
20337 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20338 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20339 // but aren't necessarily fromt the same base node, so
20340 // cycles possible (e.g. via indexed store).
20341 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20342 // non-indexed stores). Not constant on all targets (e.g. ARM)
20343 // and so can participate in a cycle.
20344 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20345 Worklist.push_back(N->getOperand(j).getNode());
20346 }
20347 // Search through DAG. We can stop early if we find a store node.
20348 for (unsigned i = 0; i < NumStores; ++i)
20349 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20350 Max)) {
20351 // If the searching bail out, record the StoreNode and RootNode in the
20352 // StoreRootCountMap. If we have seen the pair many times over a limit,
20353 // we won't add the StoreNode into StoreNodes set again.
20354 if (Visited.size() >= Max) {
20355 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20356 if (RootCount.first == RootNode)
20357 RootCount.second++;
20358 else
20359 RootCount = {RootNode, 1};
20360 }
20361 return false;
20362 }
20363 return true;
20364}
20365
20366unsigned
20367DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20368 int64_t ElementSizeBytes) const {
20369 while (true) {
20370 // Find a store past the width of the first store.
20371 size_t StartIdx = 0;
20372 while ((StartIdx + 1 < StoreNodes.size()) &&
20373 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20374 StoreNodes[StartIdx + 1].OffsetFromBase)
20375 ++StartIdx;
20376
20377 // Bail if we don't have enough candidates to merge.
20378 if (StartIdx + 1 >= StoreNodes.size())
20379 return 0;
20380
20381 // Trim stores that overlapped with the first store.
20382 if (StartIdx)
20383 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20384
20385 // Scan the memory operations on the chain and find the first
20386 // non-consecutive store memory address.
20387 unsigned NumConsecutiveStores = 1;
20388 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20389 // Check that the addresses are consecutive starting from the second
20390 // element in the list of stores.
20391 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20392 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20393 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20394 break;
20395 NumConsecutiveStores = i + 1;
20396 }
20397 if (NumConsecutiveStores > 1)
20398 return NumConsecutiveStores;
20399
20400 // There are no consecutive stores at the start of the list.
20401 // Remove the first store and try again.
20402 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20403 }
20404}
20405
20406bool DAGCombiner::tryStoreMergeOfConstants(
20407 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20408 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20409 LLVMContext &Context = *DAG.getContext();
20410 const DataLayout &DL = DAG.getDataLayout();
20411 int64_t ElementSizeBytes = MemVT.getStoreSize();
20412 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20413 bool MadeChange = false;
20414
20415 // Store the constants into memory as one consecutive store.
20416 while (NumConsecutiveStores >= 2) {
20417 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20418 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20419 Align FirstStoreAlign = FirstInChain->getAlign();
20420 unsigned LastLegalType = 1;
20421 unsigned LastLegalVectorType = 1;
20422 bool LastIntegerTrunc = false;
20423 bool NonZero = false;
20424 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20425 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20426 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20427 SDValue StoredVal = ST->getValue();
20428 bool IsElementZero = false;
20429 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20430 IsElementZero = C->isZero();
20431 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20432 IsElementZero = C->getConstantFPValue()->isNullValue();
20433 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20434 IsElementZero = true;
20435 if (IsElementZero) {
20436 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20437 FirstZeroAfterNonZero = i;
20438 }
20439 NonZero |= !IsElementZero;
20440
20441 // Find a legal type for the constant store.
20442 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20443 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20444 unsigned IsFast = 0;
20445
20446 // Break early when size is too large to be legal.
20447 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20448 break;
20449
20450 if (TLI.isTypeLegal(StoreTy) &&
20451 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20452 DAG.getMachineFunction()) &&
20453 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20454 *FirstInChain->getMemOperand(), &IsFast) &&
20455 IsFast) {
20456 LastIntegerTrunc = false;
20457 LastLegalType = i + 1;
20458 // Or check whether a truncstore is legal.
20459 } else if (TLI.getTypeAction(Context, StoreTy) ==
20461 EVT LegalizedStoredValTy =
20462 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20463 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20464 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20465 DAG.getMachineFunction()) &&
20466 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20467 *FirstInChain->getMemOperand(), &IsFast) &&
20468 IsFast) {
20469 LastIntegerTrunc = true;
20470 LastLegalType = i + 1;
20471 }
20472 }
20473
20474 // We only use vectors if the target allows it and the function is not
20475 // marked with the noimplicitfloat attribute.
20476 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20477 AllowVectors) {
20478 // Find a legal type for the vector store.
20479 unsigned Elts = (i + 1) * NumMemElts;
20480 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20481 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20482 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20483 TLI.allowsMemoryAccess(Context, DL, Ty,
20484 *FirstInChain->getMemOperand(), &IsFast) &&
20485 IsFast)
20486 LastLegalVectorType = i + 1;
20487 }
20488 }
20489
20490 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20491 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20492 bool UseTrunc = LastIntegerTrunc && !UseVector;
20493
20494 // Check if we found a legal integer type that creates a meaningful
20495 // merge.
20496 if (NumElem < 2) {
20497 // We know that candidate stores are in order and of correct
20498 // shape. While there is no mergeable sequence from the
20499 // beginning one may start later in the sequence. The only
20500 // reason a merge of size N could have failed where another of
20501 // the same size would not have, is if the alignment has
20502 // improved or we've dropped a non-zero value. Drop as many
20503 // candidates as we can here.
20504 unsigned NumSkip = 1;
20505 while ((NumSkip < NumConsecutiveStores) &&
20506 (NumSkip < FirstZeroAfterNonZero) &&
20507 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20508 NumSkip++;
20509
20510 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20511 NumConsecutiveStores -= NumSkip;
20512 continue;
20513 }
20514
20515 // Check that we can merge these candidates without causing a cycle.
20516 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20517 RootNode)) {
20518 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20519 NumConsecutiveStores -= NumElem;
20520 continue;
20521 }
20522
20523 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20524 /*IsConstantSrc*/ true,
20525 UseVector, UseTrunc);
20526
20527 // Remove merged stores for next iteration.
20528 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20529 NumConsecutiveStores -= NumElem;
20530 }
20531 return MadeChange;
20532}
20533
20534bool DAGCombiner::tryStoreMergeOfExtracts(
20535 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20536 EVT MemVT, SDNode *RootNode) {
20537 LLVMContext &Context = *DAG.getContext();
20538 const DataLayout &DL = DAG.getDataLayout();
20539 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20540 bool MadeChange = false;
20541
20542 // Loop on Consecutive Stores on success.
20543 while (NumConsecutiveStores >= 2) {
20544 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20545 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20546 Align FirstStoreAlign = FirstInChain->getAlign();
20547 unsigned NumStoresToMerge = 1;
20548 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20549 // Find a legal type for the vector store.
20550 unsigned Elts = (i + 1) * NumMemElts;
20551 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20552 unsigned IsFast = 0;
20553
20554 // Break early when size is too large to be legal.
20555 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20556 break;
20557
20558 if (TLI.isTypeLegal(Ty) &&
20559 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20560 TLI.allowsMemoryAccess(Context, DL, Ty,
20561 *FirstInChain->getMemOperand(), &IsFast) &&
20562 IsFast)
20563 NumStoresToMerge = i + 1;
20564 }
20565
20566 // Check if we found a legal integer type creating a meaningful
20567 // merge.
20568 if (NumStoresToMerge < 2) {
20569 // We know that candidate stores are in order and of correct
20570 // shape. While there is no mergeable sequence from the
20571 // beginning one may start later in the sequence. The only
20572 // reason a merge of size N could have failed where another of
20573 // the same size would not have, is if the alignment has
20574 // improved. Drop as many candidates as we can here.
20575 unsigned NumSkip = 1;
20576 while ((NumSkip < NumConsecutiveStores) &&
20577 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20578 NumSkip++;
20579
20580 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20581 NumConsecutiveStores -= NumSkip;
20582 continue;
20583 }
20584
20585 // Check that we can merge these candidates without causing a cycle.
20586 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20587 RootNode)) {
20588 StoreNodes.erase(StoreNodes.begin(),
20589 StoreNodes.begin() + NumStoresToMerge);
20590 NumConsecutiveStores -= NumStoresToMerge;
20591 continue;
20592 }
20593
20594 MadeChange |= mergeStoresOfConstantsOrVecElts(
20595 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20596 /*UseVector*/ true, /*UseTrunc*/ false);
20597
20598 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20599 NumConsecutiveStores -= NumStoresToMerge;
20600 }
20601 return MadeChange;
20602}
20603
20604bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20605 unsigned NumConsecutiveStores, EVT MemVT,
20606 SDNode *RootNode, bool AllowVectors,
20607 bool IsNonTemporalStore,
20608 bool IsNonTemporalLoad) {
20609 LLVMContext &Context = *DAG.getContext();
20610 const DataLayout &DL = DAG.getDataLayout();
20611 int64_t ElementSizeBytes = MemVT.getStoreSize();
20612 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20613 bool MadeChange = false;
20614
20615 // Look for load nodes which are used by the stored values.
20616 SmallVector<MemOpLink, 8> LoadNodes;
20617
20618 // Find acceptable loads. Loads need to have the same chain (token factor),
20619 // must not be zext, volatile, indexed, and they must be consecutive.
20620 BaseIndexOffset LdBasePtr;
20621
20622 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20623 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20625 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20626
20627 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20628 // If this is not the first ptr that we check.
20629 int64_t LdOffset = 0;
20630 if (LdBasePtr.getBase().getNode()) {
20631 // The base ptr must be the same.
20632 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20633 break;
20634 } else {
20635 // Check that all other base pointers are the same as this one.
20636 LdBasePtr = LdPtr;
20637 }
20638
20639 // We found a potential memory operand to merge.
20640 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20641 }
20642
20643 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20644 Align RequiredAlignment;
20645 bool NeedRotate = false;
20646 if (LoadNodes.size() == 2) {
20647 // If we have load/store pair instructions and we only have two values,
20648 // don't bother merging.
20649 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20650 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20651 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20652 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20653 break;
20654 }
20655 // If the loads are reversed, see if we can rotate the halves into place.
20656 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20657 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20658 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20659 if (Offset0 - Offset1 == ElementSizeBytes &&
20660 (hasOperation(ISD::ROTL, PairVT) ||
20661 hasOperation(ISD::ROTR, PairVT))) {
20662 std::swap(LoadNodes[0], LoadNodes[1]);
20663 NeedRotate = true;
20664 }
20665 }
20666 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20667 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20668 Align FirstStoreAlign = FirstInChain->getAlign();
20669 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20670
20671 // Scan the memory operations on the chain and find the first
20672 // non-consecutive load memory address. These variables hold the index in
20673 // the store node array.
20674
20675 unsigned LastConsecutiveLoad = 1;
20676
20677 // This variable refers to the size and not index in the array.
20678 unsigned LastLegalVectorType = 1;
20679 unsigned LastLegalIntegerType = 1;
20680 bool isDereferenceable = true;
20681 bool DoIntegerTruncate = false;
20682 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20683 SDValue LoadChain = FirstLoad->getChain();
20684 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20685 // All loads must share the same chain.
20686 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20687 break;
20688
20689 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20690 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20691 break;
20692 LastConsecutiveLoad = i;
20693
20694 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20695 isDereferenceable = false;
20696
20697 // Find a legal type for the vector store.
20698 unsigned Elts = (i + 1) * NumMemElts;
20699 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20700
20701 // Break early when size is too large to be legal.
20702 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20703 break;
20704
20705 unsigned IsFastSt = 0;
20706 unsigned IsFastLd = 0;
20707 // Don't try vector types if we need a rotate. We may still fail the
20708 // legality checks for the integer type, but we can't handle the rotate
20709 // case with vectors.
20710 // FIXME: We could use a shuffle in place of the rotate.
20711 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20712 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20713 DAG.getMachineFunction()) &&
20714 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20715 *FirstInChain->getMemOperand(), &IsFastSt) &&
20716 IsFastSt &&
20717 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20718 *FirstLoad->getMemOperand(), &IsFastLd) &&
20719 IsFastLd) {
20720 LastLegalVectorType = i + 1;
20721 }
20722
20723 // Find a legal type for the integer store.
20724 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20725 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20726 if (TLI.isTypeLegal(StoreTy) &&
20727 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20728 DAG.getMachineFunction()) &&
20729 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20730 *FirstInChain->getMemOperand(), &IsFastSt) &&
20731 IsFastSt &&
20732 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20733 *FirstLoad->getMemOperand(), &IsFastLd) &&
20734 IsFastLd) {
20735 LastLegalIntegerType = i + 1;
20736 DoIntegerTruncate = false;
20737 // Or check whether a truncstore and extload is legal.
20738 } else if (TLI.getTypeAction(Context, StoreTy) ==
20740 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20741 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20742 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20743 DAG.getMachineFunction()) &&
20744 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20745 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20746 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20747 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20748 *FirstInChain->getMemOperand(), &IsFastSt) &&
20749 IsFastSt &&
20750 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20751 *FirstLoad->getMemOperand(), &IsFastLd) &&
20752 IsFastLd) {
20753 LastLegalIntegerType = i + 1;
20754 DoIntegerTruncate = true;
20755 }
20756 }
20757 }
20758
20759 // Only use vector types if the vector type is larger than the integer
20760 // type. If they are the same, use integers.
20761 bool UseVectorTy =
20762 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20763 unsigned LastLegalType =
20764 std::max(LastLegalVectorType, LastLegalIntegerType);
20765
20766 // We add +1 here because the LastXXX variables refer to location while
20767 // the NumElem refers to array/index size.
20768 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20769 NumElem = std::min(LastLegalType, NumElem);
20770 Align FirstLoadAlign = FirstLoad->getAlign();
20771
20772 if (NumElem < 2) {
20773 // We know that candidate stores are in order and of correct
20774 // shape. While there is no mergeable sequence from the
20775 // beginning one may start later in the sequence. The only
20776 // reason a merge of size N could have failed where another of
20777 // the same size would not have is if the alignment or either
20778 // the load or store has improved. Drop as many candidates as we
20779 // can here.
20780 unsigned NumSkip = 1;
20781 while ((NumSkip < LoadNodes.size()) &&
20782 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20783 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20784 NumSkip++;
20785 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20786 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20787 NumConsecutiveStores -= NumSkip;
20788 continue;
20789 }
20790
20791 // Check that we can merge these candidates without causing a cycle.
20792 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20793 RootNode)) {
20794 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20795 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20796 NumConsecutiveStores -= NumElem;
20797 continue;
20798 }
20799
20800 // Find if it is better to use vectors or integers to load and store
20801 // to memory.
20802 EVT JointMemOpVT;
20803 if (UseVectorTy) {
20804 // Find a legal type for the vector store.
20805 unsigned Elts = NumElem * NumMemElts;
20806 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20807 } else {
20808 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20809 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20810 }
20811
20812 SDLoc LoadDL(LoadNodes[0].MemNode);
20813 SDLoc StoreDL(StoreNodes[0].MemNode);
20814
20815 // The merged loads are required to have the same incoming chain, so
20816 // using the first's chain is acceptable.
20817
20818 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20819 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20820 AddToWorklist(NewStoreChain.getNode());
20821
20822 MachineMemOperand::Flags LdMMOFlags =
20823 isDereferenceable ? MachineMemOperand::MODereferenceable
20825 if (IsNonTemporalLoad)
20827
20828 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20829
20830 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20833
20834 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20835
20836 SDValue NewLoad, NewStore;
20837 if (UseVectorTy || !DoIntegerTruncate) {
20838 NewLoad = DAG.getLoad(
20839 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20840 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20841 SDValue StoreOp = NewLoad;
20842 if (NeedRotate) {
20843 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20844 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20845 "Unexpected type for rotate-able load pair");
20846 SDValue RotAmt =
20847 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20848 // Target can convert to the identical ROTR if it does not have ROTL.
20849 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20850 }
20851 NewStore = DAG.getStore(
20852 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20853 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20854 : MachinePointerInfo(FirstStoreAS),
20855 FirstStoreAlign, StMMOFlags);
20856 } else { // This must be the truncstore/extload case
20857 EVT ExtendedTy =
20858 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20859 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20860 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20861 FirstLoad->getPointerInfo(), JointMemOpVT,
20862 FirstLoadAlign, LdMMOFlags);
20863 NewStore = DAG.getTruncStore(
20864 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20865 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20866 : MachinePointerInfo(FirstStoreAS),
20867 JointMemOpVT, FirstInChain->getAlign(),
20868 FirstInChain->getMemOperand()->getFlags());
20869 }
20870
20871 // Transfer chain users from old loads to the new load.
20872 for (unsigned i = 0; i < NumElem; ++i) {
20873 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20875 SDValue(NewLoad.getNode(), 1));
20876 }
20877
20878 // Replace all stores with the new store. Recursively remove corresponding
20879 // values if they are no longer used.
20880 for (unsigned i = 0; i < NumElem; ++i) {
20881 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20882 CombineTo(StoreNodes[i].MemNode, NewStore);
20883 if (Val->use_empty())
20884 recursivelyDeleteUnusedNodes(Val.getNode());
20885 }
20886
20887 MadeChange = true;
20888 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20889 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20890 NumConsecutiveStores -= NumElem;
20891 }
20892 return MadeChange;
20893}
20894
20895bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20896 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20897 return false;
20898
20899 // TODO: Extend this function to merge stores of scalable vectors.
20900 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20901 // store since we know <vscale x 16 x i8> is exactly twice as large as
20902 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20903 EVT MemVT = St->getMemoryVT();
20904 if (MemVT.isScalableVT())
20905 return false;
20906 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20907 return false;
20908
20909 // This function cannot currently deal with non-byte-sized memory sizes.
20910 int64_t ElementSizeBytes = MemVT.getStoreSize();
20911 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20912 return false;
20913
20914 // Do not bother looking at stored values that are not constants, loads, or
20915 // extracted vector elements.
20916 SDValue StoredVal = peekThroughBitcasts(St->getValue());
20917 const StoreSource StoreSrc = getStoreSource(StoredVal);
20918 if (StoreSrc == StoreSource::Unknown)
20919 return false;
20920
20921 SmallVector<MemOpLink, 8> StoreNodes;
20922 SDNode *RootNode;
20923 // Find potential store merge candidates by searching through chain sub-DAG
20924 getStoreMergeCandidates(St, StoreNodes, RootNode);
20925
20926 // Check if there is anything to merge.
20927 if (StoreNodes.size() < 2)
20928 return false;
20929
20930 // Sort the memory operands according to their distance from the
20931 // base pointer.
20932 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20933 return LHS.OffsetFromBase < RHS.OffsetFromBase;
20934 });
20935
20936 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20937 Attribute::NoImplicitFloat);
20938 bool IsNonTemporalStore = St->isNonTemporal();
20939 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20940 cast<LoadSDNode>(StoredVal)->isNonTemporal();
20941
20942 // Store Merge attempts to merge the lowest stores. This generally
20943 // works out as if successful, as the remaining stores are checked
20944 // after the first collection of stores is merged. However, in the
20945 // case that a non-mergeable store is found first, e.g., {p[-2],
20946 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20947 // mergeable cases. To prevent this, we prune such stores from the
20948 // front of StoreNodes here.
20949 bool MadeChange = false;
20950 while (StoreNodes.size() > 1) {
20951 unsigned NumConsecutiveStores =
20952 getConsecutiveStores(StoreNodes, ElementSizeBytes);
20953 // There are no more stores in the list to examine.
20954 if (NumConsecutiveStores == 0)
20955 return MadeChange;
20956
20957 // We have at least 2 consecutive stores. Try to merge them.
20958 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20959 switch (StoreSrc) {
20960 case StoreSource::Constant:
20961 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20962 MemVT, RootNode, AllowVectors);
20963 break;
20964
20965 case StoreSource::Extract:
20966 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20967 MemVT, RootNode);
20968 break;
20969
20970 case StoreSource::Load:
20971 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20972 MemVT, RootNode, AllowVectors,
20973 IsNonTemporalStore, IsNonTemporalLoad);
20974 break;
20975
20976 default:
20977 llvm_unreachable("Unhandled store source type");
20978 }
20979 }
20980 return MadeChange;
20981}
20982
20983SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20984 SDLoc SL(ST);
20985 SDValue ReplStore;
20986
20987 // Replace the chain to avoid dependency.
20988 if (ST->isTruncatingStore()) {
20989 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20990 ST->getBasePtr(), ST->getMemoryVT(),
20991 ST->getMemOperand());
20992 } else {
20993 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20994 ST->getMemOperand());
20995 }
20996
20997 // Create token to keep both nodes around.
20998 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20999 MVT::Other, ST->getChain(), ReplStore);
21000
21001 // Make sure the new and old chains are cleaned up.
21002 AddToWorklist(Token.getNode());
21003
21004 // Don't add users to work list.
21005 return CombineTo(ST, Token, false);
21006}
21007
21008SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21009 SDValue Value = ST->getValue();
21010 if (Value.getOpcode() == ISD::TargetConstantFP)
21011 return SDValue();
21012
21013 if (!ISD::isNormalStore(ST))
21014 return SDValue();
21015
21016 SDLoc DL(ST);
21017
21018 SDValue Chain = ST->getChain();
21019 SDValue Ptr = ST->getBasePtr();
21020
21021 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21022
21023 // NOTE: If the original store is volatile, this transform must not increase
21024 // the number of stores. For example, on x86-32 an f64 can be stored in one
21025 // processor operation but an i64 (which is not legal) requires two. So the
21026 // transform should not be done in this case.
21027
21028 SDValue Tmp;
21029 switch (CFP->getSimpleValueType(0).SimpleTy) {
21030 default:
21031 llvm_unreachable("Unknown FP type");
21032 case MVT::f16: // We don't do this for these yet.
21033 case MVT::bf16:
21034 case MVT::f80:
21035 case MVT::f128:
21036 case MVT::ppcf128:
21037 return SDValue();
21038 case MVT::f32:
21039 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21040 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21041 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21042 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21043 MVT::i32);
21044 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21045 }
21046
21047 return SDValue();
21048 case MVT::f64:
21049 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21050 ST->isSimple()) ||
21051 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21052 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21053 getZExtValue(), SDLoc(CFP), MVT::i64);
21054 return DAG.getStore(Chain, DL, Tmp,
21055 Ptr, ST->getMemOperand());
21056 }
21057
21058 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21059 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21060 // Many FP stores are not made apparent until after legalize, e.g. for
21061 // argument passing. Since this is so common, custom legalize the
21062 // 64-bit integer store into two 32-bit stores.
21064 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21065 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21066 if (DAG.getDataLayout().isBigEndian())
21067 std::swap(Lo, Hi);
21068
21069 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21070 AAMDNodes AAInfo = ST->getAAInfo();
21071
21072 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21073 ST->getOriginalAlign(), MMOFlags, AAInfo);
21075 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21076 ST->getPointerInfo().getWithOffset(4),
21077 ST->getOriginalAlign(), MMOFlags, AAInfo);
21078 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21079 St0, St1);
21080 }
21081
21082 return SDValue();
21083 }
21084}
21085
21086// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21087//
21088// If a store of a load with an element inserted into it has no other
21089// uses in between the chain, then we can consider the vector store
21090// dead and replace it with just the single scalar element store.
21091SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21092 SDLoc DL(ST);
21093 SDValue Value = ST->getValue();
21094 SDValue Ptr = ST->getBasePtr();
21095 SDValue Chain = ST->getChain();
21096 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21097 return SDValue();
21098
21099 SDValue Elt = Value.getOperand(1);
21100 SDValue Idx = Value.getOperand(2);
21101
21102 // If the element isn't byte sized or is implicitly truncated then we can't
21103 // compute an offset.
21104 EVT EltVT = Elt.getValueType();
21105 if (!EltVT.isByteSized() ||
21106 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21107 return SDValue();
21108
21109 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21110 if (!Ld || Ld->getBasePtr() != Ptr ||
21111 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21112 !ISD::isNormalStore(ST) ||
21113 Ld->getAddressSpace() != ST->getAddressSpace() ||
21115 return SDValue();
21116
21117 unsigned IsFast;
21118 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21119 Elt.getValueType(), ST->getAddressSpace(),
21120 ST->getAlign(), ST->getMemOperand()->getFlags(),
21121 &IsFast) ||
21122 !IsFast)
21123 return SDValue();
21124
21125 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21126
21127 // If the offset is a known constant then try to recover the pointer
21128 // info
21129 SDValue NewPtr;
21130 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21131 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21132 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21133 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21134 } else {
21135 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21136 }
21137
21138 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21139 ST->getMemOperand()->getFlags());
21140}
21141
21142SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21143 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21144 SDValue Val = ST->getVal();
21145 EVT VT = Val.getValueType();
21146 EVT MemVT = ST->getMemoryVT();
21147
21148 if (MemVT.bitsLT(VT)) { // Is truncating store
21149 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21150 MemVT.getScalarSizeInBits());
21151 // See if we can simplify the operation with SimplifyDemandedBits, which
21152 // only works if the value has a single use.
21153 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21154 return SDValue(N, 0);
21155 }
21156
21157 return SDValue();
21158}
21159
21160SDValue DAGCombiner::visitSTORE(SDNode *N) {
21161 StoreSDNode *ST = cast<StoreSDNode>(N);
21162 SDValue Chain = ST->getChain();
21163 SDValue Value = ST->getValue();
21164 SDValue Ptr = ST->getBasePtr();
21165
21166 // If this is a store of a bit convert, store the input value if the
21167 // resultant store does not need a higher alignment than the original.
21168 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21169 ST->isUnindexed()) {
21170 EVT SVT = Value.getOperand(0).getValueType();
21171 // If the store is volatile, we only want to change the store type if the
21172 // resulting store is legal. Otherwise we might increase the number of
21173 // memory accesses. We don't care if the original type was legal or not
21174 // as we assume software couldn't rely on the number of accesses of an
21175 // illegal type.
21176 // TODO: May be able to relax for unordered atomics (see D66309)
21177 if (((!LegalOperations && ST->isSimple()) ||
21178 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21179 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21180 DAG, *ST->getMemOperand())) {
21181 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21182 ST->getMemOperand());
21183 }
21184 }
21185
21186 // Turn 'store undef, Ptr' -> nothing.
21187 if (Value.isUndef() && ST->isUnindexed())
21188 return Chain;
21189
21190 // Try to infer better alignment information than the store already has.
21191 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21192 !ST->isAtomic()) {
21193 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21194 if (*Alignment > ST->getAlign() &&
21195 isAligned(*Alignment, ST->getSrcValueOffset())) {
21196 SDValue NewStore =
21197 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21198 ST->getMemoryVT(), *Alignment,
21199 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21200 // NewStore will always be N as we are only refining the alignment
21201 assert(NewStore.getNode() == N);
21202 (void)NewStore;
21203 }
21204 }
21205 }
21206
21207 // Try transforming a pair floating point load / store ops to integer
21208 // load / store ops.
21209 if (SDValue NewST = TransformFPLoadStorePair(N))
21210 return NewST;
21211
21212 // Try transforming several stores into STORE (BSWAP).
21213 if (SDValue Store = mergeTruncStores(ST))
21214 return Store;
21215
21216 if (ST->isUnindexed()) {
21217 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21218 // adjacent stores.
21219 if (findBetterNeighborChains(ST)) {
21220 // replaceStoreChain uses CombineTo, which handled all of the worklist
21221 // manipulation. Return the original node to not do anything else.
21222 return SDValue(ST, 0);
21223 }
21224 Chain = ST->getChain();
21225 }
21226
21227 // FIXME: is there such a thing as a truncating indexed store?
21228 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21229 Value.getValueType().isInteger() &&
21230 (!isa<ConstantSDNode>(Value) ||
21231 !cast<ConstantSDNode>(Value)->isOpaque())) {
21232 // Convert a truncating store of a extension into a standard store.
21233 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21234 Value.getOpcode() == ISD::SIGN_EXTEND ||
21235 Value.getOpcode() == ISD::ANY_EXTEND) &&
21236 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21237 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21238 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21239 ST->getMemOperand());
21240
21241 APInt TruncDemandedBits =
21242 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21243 ST->getMemoryVT().getScalarSizeInBits());
21244
21245 // See if we can simplify the operation with SimplifyDemandedBits, which
21246 // only works if the value has a single use.
21247 AddToWorklist(Value.getNode());
21248 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21249 // Re-visit the store if anything changed and the store hasn't been merged
21250 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21251 // node back to the worklist if necessary, but we also need to re-visit
21252 // the Store node itself.
21253 if (N->getOpcode() != ISD::DELETED_NODE)
21254 AddToWorklist(N);
21255 return SDValue(N, 0);
21256 }
21257
21258 // Otherwise, see if we can simplify the input to this truncstore with
21259 // knowledge that only the low bits are being used. For example:
21260 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21261 if (SDValue Shorter =
21262 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21263 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21264 ST->getMemOperand());
21265
21266 // If we're storing a truncated constant, see if we can simplify it.
21267 // TODO: Move this to targetShrinkDemandedConstant?
21268 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21269 if (!Cst->isOpaque()) {
21270 const APInt &CValue = Cst->getAPIntValue();
21271 APInt NewVal = CValue & TruncDemandedBits;
21272 if (NewVal != CValue) {
21273 SDValue Shorter =
21274 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21275 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21276 ST->getMemoryVT(), ST->getMemOperand());
21277 }
21278 }
21279 }
21280
21281 // If this is a load followed by a store to the same location, then the store
21282 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21283 // TODO: Add big-endian truncate support with test coverage.
21284 // TODO: Can relax for unordered atomics (see D66309)
21285 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21287 : Value;
21288 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21289 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21290 ST->isUnindexed() && ST->isSimple() &&
21291 Ld->getAddressSpace() == ST->getAddressSpace() &&
21292 // There can't be any side effects between the load and store, such as
21293 // a call or store.
21295 // The store is dead, remove it.
21296 return Chain;
21297 }
21298 }
21299
21300 // Try scalarizing vector stores of loads where we only change one element
21301 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21302 return NewST;
21303
21304 // TODO: Can relax for unordered atomics (see D66309)
21305 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21306 if (ST->isUnindexed() && ST->isSimple() &&
21307 ST1->isUnindexed() && ST1->isSimple()) {
21308 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21309 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21310 ST->getAddressSpace() == ST1->getAddressSpace()) {
21311 // If this is a store followed by a store with the same value to the
21312 // same location, then the store is dead/noop.
21313 return Chain;
21314 }
21315
21316 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21317 !ST1->getBasePtr().isUndef() &&
21318 ST->getAddressSpace() == ST1->getAddressSpace()) {
21319 // If we consider two stores and one smaller in size is a scalable
21320 // vector type and another one a bigger size store with a fixed type,
21321 // then we could not allow the scalable store removal because we don't
21322 // know its final size in the end.
21323 if (ST->getMemoryVT().isScalableVector() ||
21324 ST1->getMemoryVT().isScalableVector()) {
21325 if (ST1->getBasePtr() == Ptr &&
21326 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21327 ST->getMemoryVT().getStoreSize())) {
21328 CombineTo(ST1, ST1->getChain());
21329 return SDValue(N, 0);
21330 }
21331 } else {
21332 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21333 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21334 // If this is a store who's preceding store to a subset of the current
21335 // location and no one other node is chained to that store we can
21336 // effectively drop the store. Do not remove stores to undef as they
21337 // may be used as data sinks.
21338 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21339 ChainBase,
21340 ST1->getMemoryVT().getFixedSizeInBits())) {
21341 CombineTo(ST1, ST1->getChain());
21342 return SDValue(N, 0);
21343 }
21344 }
21345 }
21346 }
21347 }
21348
21349 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21350 // truncating store. We can do this even if this is already a truncstore.
21351 if ((Value.getOpcode() == ISD::FP_ROUND ||
21352 Value.getOpcode() == ISD::TRUNCATE) &&
21353 Value->hasOneUse() && ST->isUnindexed() &&
21354 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21355 ST->getMemoryVT(), LegalOperations)) {
21356 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21357 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21358 }
21359
21360 // Always perform this optimization before types are legal. If the target
21361 // prefers, also try this after legalization to catch stores that were created
21362 // by intrinsics or other nodes.
21363 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21364 while (true) {
21365 // There can be multiple store sequences on the same chain.
21366 // Keep trying to merge store sequences until we are unable to do so
21367 // or until we merge the last store on the chain.
21368 bool Changed = mergeConsecutiveStores(ST);
21369 if (!Changed) break;
21370 // Return N as merge only uses CombineTo and no worklist clean
21371 // up is necessary.
21372 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21373 return SDValue(N, 0);
21374 }
21375 }
21376
21377 // Try transforming N to an indexed store.
21378 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21379 return SDValue(N, 0);
21380
21381 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21382 //
21383 // Make sure to do this only after attempting to merge stores in order to
21384 // avoid changing the types of some subset of stores due to visit order,
21385 // preventing their merging.
21386 if (isa<ConstantFPSDNode>(ST->getValue())) {
21387 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21388 return NewSt;
21389 }
21390
21391 if (SDValue NewSt = splitMergedValStore(ST))
21392 return NewSt;
21393
21394 return ReduceLoadOpStoreWidth(N);
21395}
21396
21397SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21398 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21399 if (!LifetimeEnd->hasOffset())
21400 return SDValue();
21401
21402 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21403 LifetimeEnd->getOffset(), false);
21404
21405 // We walk up the chains to find stores.
21406 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21407 while (!Chains.empty()) {
21408 SDValue Chain = Chains.pop_back_val();
21409 if (!Chain.hasOneUse())
21410 continue;
21411 switch (Chain.getOpcode()) {
21412 case ISD::TokenFactor:
21413 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21414 Chains.push_back(Chain.getOperand(--Nops));
21415 break;
21417 case ISD::LIFETIME_END:
21418 // We can forward past any lifetime start/end that can be proven not to
21419 // alias the node.
21420 if (!mayAlias(Chain.getNode(), N))
21421 Chains.push_back(Chain.getOperand(0));
21422 break;
21423 case ISD::STORE: {
21424 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21425 // TODO: Can relax for unordered atomics (see D66309)
21426 if (!ST->isSimple() || ST->isIndexed())
21427 continue;
21428 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21429 // The bounds of a scalable store are not known until runtime, so this
21430 // store cannot be elided.
21431 if (StoreSize.isScalable())
21432 continue;
21433 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21434 // If we store purely within object bounds just before its lifetime ends,
21435 // we can remove the store.
21436 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21437 StoreSize.getFixedValue() * 8)) {
21438 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21439 dbgs() << "\nwithin LIFETIME_END of : ";
21440 LifetimeEndBase.dump(); dbgs() << "\n");
21441 CombineTo(ST, ST->getChain());
21442 return SDValue(N, 0);
21443 }
21444 }
21445 }
21446 }
21447 return SDValue();
21448}
21449
21450/// For the instruction sequence of store below, F and I values
21451/// are bundled together as an i64 value before being stored into memory.
21452/// Sometimes it is more efficent to generate separate stores for F and I,
21453/// which can remove the bitwise instructions or sink them to colder places.
21454///
21455/// (store (or (zext (bitcast F to i32) to i64),
21456/// (shl (zext I to i64), 32)), addr) -->
21457/// (store F, addr) and (store I, addr+4)
21458///
21459/// Similarly, splitting for other merged store can also be beneficial, like:
21460/// For pair of {i32, i32}, i64 store --> two i32 stores.
21461/// For pair of {i32, i16}, i64 store --> two i32 stores.
21462/// For pair of {i16, i16}, i32 store --> two i16 stores.
21463/// For pair of {i16, i8}, i32 store --> two i16 stores.
21464/// For pair of {i8, i8}, i16 store --> two i8 stores.
21465///
21466/// We allow each target to determine specifically which kind of splitting is
21467/// supported.
21468///
21469/// The store patterns are commonly seen from the simple code snippet below
21470/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21471/// void goo(const std::pair<int, float> &);
21472/// hoo() {
21473/// ...
21474/// goo(std::make_pair(tmp, ftmp));
21475/// ...
21476/// }
21477///
21478SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21479 if (OptLevel == CodeGenOptLevel::None)
21480 return SDValue();
21481
21482 // Can't change the number of memory accesses for a volatile store or break
21483 // atomicity for an atomic one.
21484 if (!ST->isSimple())
21485 return SDValue();
21486
21487 SDValue Val = ST->getValue();
21488 SDLoc DL(ST);
21489
21490 // Match OR operand.
21491 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21492 return SDValue();
21493
21494 // Match SHL operand and get Lower and Higher parts of Val.
21495 SDValue Op1 = Val.getOperand(0);
21496 SDValue Op2 = Val.getOperand(1);
21497 SDValue Lo, Hi;
21498 if (Op1.getOpcode() != ISD::SHL) {
21499 std::swap(Op1, Op2);
21500 if (Op1.getOpcode() != ISD::SHL)
21501 return SDValue();
21502 }
21503 Lo = Op2;
21504 Hi = Op1.getOperand(0);
21505 if (!Op1.hasOneUse())
21506 return SDValue();
21507
21508 // Match shift amount to HalfValBitSize.
21509 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21510 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21511 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21512 return SDValue();
21513
21514 // Lo and Hi are zero-extended from int with size less equal than 32
21515 // to i64.
21516 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21517 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21518 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21519 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21520 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21521 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21522 return SDValue();
21523
21524 // Use the EVT of low and high parts before bitcast as the input
21525 // of target query.
21526 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21527 ? Lo.getOperand(0).getValueType()
21528 : Lo.getValueType();
21529 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21530 ? Hi.getOperand(0).getValueType()
21531 : Hi.getValueType();
21532 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21533 return SDValue();
21534
21535 // Start to split store.
21536 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21537 AAMDNodes AAInfo = ST->getAAInfo();
21538
21539 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21540 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21541 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21542 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21543
21544 SDValue Chain = ST->getChain();
21545 SDValue Ptr = ST->getBasePtr();
21546 // Lower value store.
21547 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21548 ST->getOriginalAlign(), MMOFlags, AAInfo);
21549 Ptr =
21550 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21551 // Higher value store.
21552 SDValue St1 = DAG.getStore(
21553 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21554 ST->getOriginalAlign(), MMOFlags, AAInfo);
21555 return St1;
21556}
21557
21558// Merge an insertion into an existing shuffle:
21559// (insert_vector_elt (vector_shuffle X, Y, Mask),
21560// .(extract_vector_elt X, N), InsIndex)
21561// --> (vector_shuffle X, Y, NewMask)
21562// and variations where shuffle operands may be CONCAT_VECTORS.
21564 SmallVectorImpl<int> &NewMask, SDValue Elt,
21565 unsigned InsIndex) {
21566 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21567 !isa<ConstantSDNode>(Elt.getOperand(1)))
21568 return false;
21569
21570 // Vec's operand 0 is using indices from 0 to N-1 and
21571 // operand 1 from N to 2N - 1, where N is the number of
21572 // elements in the vectors.
21573 SDValue InsertVal0 = Elt.getOperand(0);
21574 int ElementOffset = -1;
21575
21576 // We explore the inputs of the shuffle in order to see if we find the
21577 // source of the extract_vector_elt. If so, we can use it to modify the
21578 // shuffle rather than perform an insert_vector_elt.
21580 ArgWorkList.emplace_back(Mask.size(), Y);
21581 ArgWorkList.emplace_back(0, X);
21582
21583 while (!ArgWorkList.empty()) {
21584 int ArgOffset;
21585 SDValue ArgVal;
21586 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21587
21588 if (ArgVal == InsertVal0) {
21589 ElementOffset = ArgOffset;
21590 break;
21591 }
21592
21593 // Peek through concat_vector.
21594 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21595 int CurrentArgOffset =
21596 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21597 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21598 for (SDValue Op : reverse(ArgVal->ops())) {
21599 CurrentArgOffset -= Step;
21600 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21601 }
21602
21603 // Make sure we went through all the elements and did not screw up index
21604 // computation.
21605 assert(CurrentArgOffset == ArgOffset);
21606 }
21607 }
21608
21609 // If we failed to find a match, see if we can replace an UNDEF shuffle
21610 // operand.
21611 if (ElementOffset == -1) {
21612 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21613 return false;
21614 ElementOffset = Mask.size();
21615 Y = InsertVal0;
21616 }
21617
21618 NewMask.assign(Mask.begin(), Mask.end());
21619 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21620 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21621 "NewMask[InsIndex] is out of bound");
21622 return true;
21623}
21624
21625// Merge an insertion into an existing shuffle:
21626// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21627// InsIndex)
21628// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21629// CONCAT_VECTORS.
21630SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21631 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21632 "Expected extract_vector_elt");
21633 SDValue InsertVal = N->getOperand(1);
21634 SDValue Vec = N->getOperand(0);
21635
21636 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21637 if (!SVN || !Vec.hasOneUse())
21638 return SDValue();
21639
21640 ArrayRef<int> Mask = SVN->getMask();
21641 SDValue X = Vec.getOperand(0);
21642 SDValue Y = Vec.getOperand(1);
21643
21644 SmallVector<int, 16> NewMask(Mask);
21645 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21646 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21647 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21648 if (LegalShuffle)
21649 return LegalShuffle;
21650 }
21651
21652 return SDValue();
21653}
21654
21655// Convert a disguised subvector insertion into a shuffle:
21656// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21657// bitcast(shuffle (bitcast V), (extended X), Mask)
21658// Note: We do not use an insert_subvector node because that requires a
21659// legal subvector type.
21660SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21661 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21662 "Expected extract_vector_elt");
21663 SDValue InsertVal = N->getOperand(1);
21664
21665 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21666 !InsertVal.getOperand(0).getValueType().isVector())
21667 return SDValue();
21668
21669 SDValue SubVec = InsertVal.getOperand(0);
21670 SDValue DestVec = N->getOperand(0);
21671 EVT SubVecVT = SubVec.getValueType();
21672 EVT VT = DestVec.getValueType();
21673 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21674 // If the source only has a single vector element, the cost of creating adding
21675 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21676 if (NumSrcElts == 1)
21677 return SDValue();
21678 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21679 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21680
21681 // Step 1: Create a shuffle mask that implements this insert operation. The
21682 // vector that we are inserting into will be operand 0 of the shuffle, so
21683 // those elements are just 'i'. The inserted subvector is in the first
21684 // positions of operand 1 of the shuffle. Example:
21685 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21686 SmallVector<int, 16> Mask(NumMaskVals);
21687 for (unsigned i = 0; i != NumMaskVals; ++i) {
21688 if (i / NumSrcElts == InsIndex)
21689 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21690 else
21691 Mask[i] = i;
21692 }
21693
21694 // Bail out if the target can not handle the shuffle we want to create.
21695 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21696 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21697 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21698 return SDValue();
21699
21700 // Step 2: Create a wide vector from the inserted source vector by appending
21701 // undefined elements. This is the same size as our destination vector.
21702 SDLoc DL(N);
21703 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21704 ConcatOps[0] = SubVec;
21705 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21706
21707 // Step 3: Shuffle in the padded subvector.
21708 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21709 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21710 AddToWorklist(PaddedSubV.getNode());
21711 AddToWorklist(DestVecBC.getNode());
21712 AddToWorklist(Shuf.getNode());
21713 return DAG.getBitcast(VT, Shuf);
21714}
21715
21716// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21717// possible and the new load will be quick. We use more loads but less shuffles
21718// and inserts.
21719SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21720 EVT VT = N->getValueType(0);
21721
21722 // InsIndex is expected to be the first of last lane.
21723 if (!VT.isFixedLengthVector() ||
21724 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21725 return SDValue();
21726
21727 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21728 // depending on the InsIndex.
21729 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21730 SDValue Scalar = N->getOperand(1);
21731 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21732 return InsIndex == P.index() || P.value() < 0 ||
21733 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21734 (InsIndex == VT.getVectorNumElements() - 1 &&
21735 P.value() == (int)P.index() + 1);
21736 }))
21737 return SDValue();
21738
21739 // We optionally skip over an extend so long as both loads are extended in the
21740 // same way from the same type.
21741 unsigned Extend = 0;
21742 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21743 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21744 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21745 Extend = Scalar.getOpcode();
21746 Scalar = Scalar.getOperand(0);
21747 }
21748
21749 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21750 if (!ScalarLoad)
21751 return SDValue();
21752
21753 SDValue Vec = Shuffle->getOperand(0);
21754 if (Extend) {
21755 if (Vec.getOpcode() != Extend)
21756 return SDValue();
21757 Vec = Vec.getOperand(0);
21758 }
21759 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21760 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21761 return SDValue();
21762
21763 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21764 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21765 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21766 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21767 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21768 return SDValue();
21769
21770 // Check that the offset between the pointers to produce a single continuous
21771 // load.
21772 if (InsIndex == 0) {
21773 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21774 -1))
21775 return SDValue();
21776 } else {
21778 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21779 return SDValue();
21780 }
21781
21782 // And that the new unaligned load will be fast.
21783 unsigned IsFast = 0;
21784 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21785 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21786 Vec.getValueType(), VecLoad->getAddressSpace(),
21787 NewAlign, VecLoad->getMemOperand()->getFlags(),
21788 &IsFast) ||
21789 !IsFast)
21790 return SDValue();
21791
21792 // Calculate the new Ptr and create the new load.
21793 SDLoc DL(N);
21794 SDValue Ptr = ScalarLoad->getBasePtr();
21795 if (InsIndex != 0)
21796 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21797 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21798 MachinePointerInfo PtrInfo =
21799 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21800 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21801
21802 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21803 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21804 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21805 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21806 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21807}
21808
21809SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21810 SDValue InVec = N->getOperand(0);
21811 SDValue InVal = N->getOperand(1);
21812 SDValue EltNo = N->getOperand(2);
21813 SDLoc DL(N);
21814
21815 EVT VT = InVec.getValueType();
21816 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21817
21818 // Insert into out-of-bounds element is undefined.
21819 if (IndexC && VT.isFixedLengthVector() &&
21820 IndexC->getZExtValue() >= VT.getVectorNumElements())
21821 return DAG.getUNDEF(VT);
21822
21823 // Remove redundant insertions:
21824 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21825 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21826 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21827 return InVec;
21828
21829 if (!IndexC) {
21830 // If this is variable insert to undef vector, it might be better to splat:
21831 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21832 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21833 return DAG.getSplat(VT, DL, InVal);
21834 return SDValue();
21835 }
21836
21837 if (VT.isScalableVector())
21838 return SDValue();
21839
21840 unsigned NumElts = VT.getVectorNumElements();
21841
21842 // We must know which element is being inserted for folds below here.
21843 unsigned Elt = IndexC->getZExtValue();
21844
21845 // Handle <1 x ???> vector insertion special cases.
21846 if (NumElts == 1) {
21847 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21848 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21849 InVal.getOperand(0).getValueType() == VT &&
21850 isNullConstant(InVal.getOperand(1)))
21851 return InVal.getOperand(0);
21852 }
21853
21854 // Canonicalize insert_vector_elt dag nodes.
21855 // Example:
21856 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21857 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21858 //
21859 // Do this only if the child insert_vector node has one use; also
21860 // do this only if indices are both constants and Idx1 < Idx0.
21861 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21862 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21863 unsigned OtherElt = InVec.getConstantOperandVal(2);
21864 if (Elt < OtherElt) {
21865 // Swap nodes.
21866 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21867 InVec.getOperand(0), InVal, EltNo);
21868 AddToWorklist(NewOp.getNode());
21869 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21870 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21871 }
21872 }
21873
21874 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21875 return Shuf;
21876
21877 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21878 return Shuf;
21879
21880 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21881 return Shuf;
21882
21883 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21884 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21885 // vXi1 vector - we don't need to recurse.
21886 if (NumElts == 1)
21887 return DAG.getBuildVector(VT, DL, {InVal});
21888
21889 // If we haven't already collected the element, insert into the op list.
21890 EVT MaxEltVT = InVal.getValueType();
21891 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21892 unsigned Idx) {
21893 if (!Ops[Idx]) {
21894 Ops[Idx] = Elt;
21895 if (VT.isInteger()) {
21896 EVT EltVT = Elt.getValueType();
21897 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21898 }
21899 }
21900 };
21901
21902 // Ensure all the operands are the same value type, fill any missing
21903 // operands with UNDEF and create the BUILD_VECTOR.
21904 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21905 assert(Ops.size() == NumElts && "Unexpected vector size");
21906 for (SDValue &Op : Ops) {
21907 if (Op)
21908 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21909 else
21910 Op = DAG.getUNDEF(MaxEltVT);
21911 }
21912 return DAG.getBuildVector(VT, DL, Ops);
21913 };
21914
21915 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21916 Ops[Elt] = InVal;
21917
21918 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21919 for (SDValue CurVec = InVec; CurVec;) {
21920 // UNDEF - build new BUILD_VECTOR from already inserted operands.
21921 if (CurVec.isUndef())
21922 return CanonicalizeBuildVector(Ops);
21923
21924 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21925 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21926 for (unsigned I = 0; I != NumElts; ++I)
21927 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21928 return CanonicalizeBuildVector(Ops);
21929 }
21930
21931 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21932 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21933 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21934 return CanonicalizeBuildVector(Ops);
21935 }
21936
21937 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21938 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21939 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21940 if (CurIdx->getAPIntValue().ult(NumElts)) {
21941 unsigned Idx = CurIdx->getZExtValue();
21942 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21943
21944 // Found entire BUILD_VECTOR.
21945 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21946 return CanonicalizeBuildVector(Ops);
21947
21948 CurVec = CurVec->getOperand(0);
21949 continue;
21950 }
21951
21952 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21953 // update the shuffle mask (and second operand if we started with unary
21954 // shuffle) and create a new legal shuffle.
21955 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21956 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21957 SDValue LHS = SVN->getOperand(0);
21958 SDValue RHS = SVN->getOperand(1);
21960 bool Merged = true;
21961 for (auto I : enumerate(Ops)) {
21962 SDValue &Op = I.value();
21963 if (Op) {
21964 SmallVector<int, 16> NewMask;
21965 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21966 Merged = false;
21967 break;
21968 }
21969 Mask = std::move(NewMask);
21970 }
21971 }
21972 if (Merged)
21973 if (SDValue NewShuffle =
21974 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21975 return NewShuffle;
21976 }
21977
21978 // If all insertions are zero value, try to convert to AND mask.
21979 // TODO: Do this for -1 with OR mask?
21980 if (!LegalOperations && llvm::isNullConstant(InVal) &&
21981 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21982 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21983 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21984 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21986 for (unsigned I = 0; I != NumElts; ++I)
21987 Mask[I] = Ops[I] ? Zero : AllOnes;
21988 return DAG.getNode(ISD::AND, DL, VT, CurVec,
21989 DAG.getBuildVector(VT, DL, Mask));
21990 }
21991
21992 // Failed to find a match in the chain - bail.
21993 break;
21994 }
21995
21996 // See if we can fill in the missing constant elements as zeros.
21997 // TODO: Should we do this for any constant?
21998 APInt DemandedZeroElts = APInt::getZero(NumElts);
21999 for (unsigned I = 0; I != NumElts; ++I)
22000 if (!Ops[I])
22001 DemandedZeroElts.setBit(I);
22002
22003 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22004 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22005 : DAG.getConstantFP(0, DL, MaxEltVT);
22006 for (unsigned I = 0; I != NumElts; ++I)
22007 if (!Ops[I])
22008 Ops[I] = Zero;
22009
22010 return CanonicalizeBuildVector(Ops);
22011 }
22012 }
22013
22014 return SDValue();
22015}
22016
22017SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22018 SDValue EltNo,
22019 LoadSDNode *OriginalLoad) {
22020 assert(OriginalLoad->isSimple());
22021
22022 EVT ResultVT = EVE->getValueType(0);
22023 EVT VecEltVT = InVecVT.getVectorElementType();
22024
22025 // If the vector element type is not a multiple of a byte then we are unable
22026 // to correctly compute an address to load only the extracted element as a
22027 // scalar.
22028 if (!VecEltVT.isByteSized())
22029 return SDValue();
22030
22031 ISD::LoadExtType ExtTy =
22032 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22033 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22034 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22035 return SDValue();
22036
22037 Align Alignment = OriginalLoad->getAlign();
22039 SDLoc DL(EVE);
22040 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22041 int Elt = ConstEltNo->getZExtValue();
22042 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22043 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22044 Alignment = commonAlignment(Alignment, PtrOff);
22045 } else {
22046 // Discard the pointer info except the address space because the memory
22047 // operand can't represent this new access since the offset is variable.
22048 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22049 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22050 }
22051
22052 unsigned IsFast = 0;
22053 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22054 OriginalLoad->getAddressSpace(), Alignment,
22055 OriginalLoad->getMemOperand()->getFlags(),
22056 &IsFast) ||
22057 !IsFast)
22058 return SDValue();
22059
22060 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22061 InVecVT, EltNo);
22062
22063 // We are replacing a vector load with a scalar load. The new load must have
22064 // identical memory op ordering to the original.
22065 SDValue Load;
22066 if (ResultVT.bitsGT(VecEltVT)) {
22067 // If the result type of vextract is wider than the load, then issue an
22068 // extending load instead.
22069 ISD::LoadExtType ExtType =
22070 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22071 : ISD::EXTLOAD;
22072 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22073 NewPtr, MPI, VecEltVT, Alignment,
22074 OriginalLoad->getMemOperand()->getFlags(),
22075 OriginalLoad->getAAInfo());
22076 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22077 } else {
22078 // The result type is narrower or the same width as the vector element
22079 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22080 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22081 OriginalLoad->getAAInfo());
22082 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22083 if (ResultVT.bitsLT(VecEltVT))
22084 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22085 else
22086 Load = DAG.getBitcast(ResultVT, Load);
22087 }
22088 ++OpsNarrowed;
22089 return Load;
22090}
22091
22092/// Transform a vector binary operation into a scalar binary operation by moving
22093/// the math/logic after an extract element of a vector.
22095 bool LegalOperations) {
22096 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22097 SDValue Vec = ExtElt->getOperand(0);
22098 SDValue Index = ExtElt->getOperand(1);
22099 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22100 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22101 Vec->getNumValues() != 1)
22102 return SDValue();
22103
22104 // Targets may want to avoid this to prevent an expensive register transfer.
22105 if (!TLI.shouldScalarizeBinop(Vec))
22106 return SDValue();
22107
22108 // Extracting an element of a vector constant is constant-folded, so this
22109 // transform is just replacing a vector op with a scalar op while moving the
22110 // extract.
22111 SDValue Op0 = Vec.getOperand(0);
22112 SDValue Op1 = Vec.getOperand(1);
22113 APInt SplatVal;
22114 if (isAnyConstantBuildVector(Op0, true) ||
22115 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22116 isAnyConstantBuildVector(Op1, true) ||
22117 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22118 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22119 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22120 SDLoc DL(ExtElt);
22121 EVT VT = ExtElt->getValueType(0);
22122 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22123 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22124 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22125 }
22126
22127 return SDValue();
22128}
22129
22130// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22131// recursively analyse all of it's users. and try to model themselves as
22132// bit sequence extractions. If all of them agree on the new, narrower element
22133// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22134// new element type, do so now.
22135// This is mainly useful to recover from legalization that scalarized
22136// the vector as wide elements, but tries to rebuild it with narrower elements.
22137//
22138// Some more nodes could be modelled if that helps cover interesting patterns.
22139bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22140 SDNode *N) {
22141 // We perform this optimization post type-legalization because
22142 // the type-legalizer often scalarizes integer-promoted vectors.
22143 // Performing this optimization before may cause legalizaton cycles.
22144 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22145 return false;
22146
22147 // TODO: Add support for big-endian.
22148 if (DAG.getDataLayout().isBigEndian())
22149 return false;
22150
22151 SDValue VecOp = N->getOperand(0);
22152 EVT VecVT = VecOp.getValueType();
22153 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22154
22155 // We must start with a constant extraction index.
22156 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22157 if (!IndexC)
22158 return false;
22159
22160 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22161 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22162
22163 // TODO: deal with the case of implicit anyext of the extraction.
22164 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22165 EVT ScalarVT = N->getValueType(0);
22166 if (VecVT.getScalarType() != ScalarVT)
22167 return false;
22168
22169 // TODO: deal with the cases other than everything being integer-typed.
22170 if (!ScalarVT.isScalarInteger())
22171 return false;
22172
22173 struct Entry {
22175
22176 // Which bits of VecOp does it contain?
22177 unsigned BitPos;
22178 int NumBits;
22179 // NOTE: the actual width of \p Producer may be wider than NumBits!
22180
22181 Entry(Entry &&) = default;
22182 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22183 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22184
22185 Entry() = delete;
22186 Entry(const Entry &) = delete;
22187 Entry &operator=(const Entry &) = delete;
22188 Entry &operator=(Entry &&) = delete;
22189 };
22190 SmallVector<Entry, 32> Worklist;
22192
22193 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22194 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22195 /*NumBits=*/VecEltBitWidth);
22196
22197 while (!Worklist.empty()) {
22198 Entry E = Worklist.pop_back_val();
22199 // Does the node not even use any of the VecOp bits?
22200 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22201 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22202 return false; // Let's allow the other combines clean this up first.
22203 // Did we fail to model any of the users of the Producer?
22204 bool ProducerIsLeaf = false;
22205 // Look at each user of this Producer.
22206 for (SDNode *User : E.Producer->uses()) {
22207 switch (User->getOpcode()) {
22208 // TODO: support ISD::BITCAST
22209 // TODO: support ISD::ANY_EXTEND
22210 // TODO: support ISD::ZERO_EXTEND
22211 // TODO: support ISD::SIGN_EXTEND
22212 case ISD::TRUNCATE:
22213 // Truncation simply means we keep position, but extract less bits.
22214 Worklist.emplace_back(User, E.BitPos,
22215 /*NumBits=*/User->getValueSizeInBits(0));
22216 break;
22217 // TODO: support ISD::SRA
22218 // TODO: support ISD::SHL
22219 case ISD::SRL:
22220 // We should be shifting the Producer by a constant amount.
22221 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22222 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22223 // Logical right-shift means that we start extraction later,
22224 // but stop it at the same position we did previously.
22225 unsigned ShAmt = ShAmtC->getZExtValue();
22226 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22227 break;
22228 }
22229 [[fallthrough]];
22230 default:
22231 // We can not model this user of the Producer.
22232 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22233 ProducerIsLeaf = true;
22234 // Profitability check: all users that we can not model
22235 // must be ISD::BUILD_VECTOR's.
22236 if (User->getOpcode() != ISD::BUILD_VECTOR)
22237 return false;
22238 break;
22239 }
22240 }
22241 if (ProducerIsLeaf)
22242 Leafs.emplace_back(std::move(E));
22243 }
22244
22245 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22246
22247 // If we are still at the same element granularity, give up,
22248 if (NewVecEltBitWidth == VecEltBitWidth)
22249 return false;
22250
22251 // The vector width must be a multiple of the new element width.
22252 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22253 return false;
22254
22255 // All leafs must agree on the new element width.
22256 // All leafs must not expect any "padding" bits ontop of that width.
22257 // All leafs must start extraction from multiple of that width.
22258 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22259 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22260 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22261 E.BitPos % NewVecEltBitWidth == 0;
22262 }))
22263 return false;
22264
22265 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22266 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22267 VecVT.getSizeInBits() / NewVecEltBitWidth);
22268
22269 if (LegalTypes &&
22270 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22271 return false;
22272
22273 if (LegalOperations &&
22274 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22276 return false;
22277
22278 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22279 for (const Entry &E : Leafs) {
22280 SDLoc DL(E.Producer);
22281 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22282 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22283 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22284 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22285 DAG.getVectorIdxConstant(NewIndex, DL));
22286 CombineTo(E.Producer, V);
22287 }
22288
22289 return true;
22290}
22291
22292SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22293 SDValue VecOp = N->getOperand(0);
22294 SDValue Index = N->getOperand(1);
22295 EVT ScalarVT = N->getValueType(0);
22296 EVT VecVT = VecOp.getValueType();
22297 if (VecOp.isUndef())
22298 return DAG.getUNDEF(ScalarVT);
22299
22300 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22301 //
22302 // This only really matters if the index is non-constant since other combines
22303 // on the constant elements already work.
22304 SDLoc DL(N);
22305 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22306 Index == VecOp.getOperand(2)) {
22307 SDValue Elt = VecOp.getOperand(1);
22308 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22309 }
22310
22311 // (vextract (scalar_to_vector val, 0) -> val
22312 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22313 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22314 if (DAG.isKnownNeverZero(Index))
22315 return DAG.getUNDEF(ScalarVT);
22316
22317 // Check if the result type doesn't match the inserted element type.
22318 // The inserted element and extracted element may have mismatched bitwidth.
22319 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22320 SDValue InOp = VecOp.getOperand(0);
22321 if (InOp.getValueType() != ScalarVT) {
22322 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22323 if (InOp.getValueType().bitsGT(ScalarVT))
22324 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22325 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22326 }
22327 return InOp;
22328 }
22329
22330 // extract_vector_elt of out-of-bounds element -> UNDEF
22331 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22332 if (IndexC && VecVT.isFixedLengthVector() &&
22333 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22334 return DAG.getUNDEF(ScalarVT);
22335
22336 // extract_vector_elt (build_vector x, y), 1 -> y
22337 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22338 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22339 TLI.isTypeLegal(VecVT)) {
22340 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22341 VecVT.isFixedLengthVector()) &&
22342 "BUILD_VECTOR used for scalable vectors");
22343 unsigned IndexVal =
22344 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22345 SDValue Elt = VecOp.getOperand(IndexVal);
22346 EVT InEltVT = Elt.getValueType();
22347
22348 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22349 isNullConstant(Elt)) {
22350 // Sometimes build_vector's scalar input types do not match result type.
22351 if (ScalarVT == InEltVT)
22352 return Elt;
22353
22354 // TODO: It may be useful to truncate if free if the build_vector
22355 // implicitly converts.
22356 }
22357 }
22358
22359 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22360 return BO;
22361
22362 if (VecVT.isScalableVector())
22363 return SDValue();
22364
22365 // All the code from this point onwards assumes fixed width vectors, but it's
22366 // possible that some of the combinations could be made to work for scalable
22367 // vectors too.
22368 unsigned NumElts = VecVT.getVectorNumElements();
22369 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22370
22371 // See if the extracted element is constant, in which case fold it if its
22372 // a legal fp immediate.
22373 if (IndexC && ScalarVT.isFloatingPoint()) {
22374 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22375 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22376 if (KnownElt.isConstant()) {
22377 APFloat CstFP =
22378 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22379 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22380 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22381 }
22382 }
22383
22384 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22385 // there are regressions on multiple targets without it. We can end up with a
22386 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22387 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22388 VecOp.hasOneUse()) {
22389 // The vector index of the LSBs of the source depend on the endian-ness.
22390 bool IsLE = DAG.getDataLayout().isLittleEndian();
22391 unsigned ExtractIndex = IndexC->getZExtValue();
22392 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22393 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22394 SDValue BCSrc = VecOp.getOperand(0);
22395 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22396 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22397
22398 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22399 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22400 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22401 // trunc i64 X to i32
22402 SDValue X = BCSrc.getOperand(0);
22403 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22404 "Extract element and scalar to vector can't change element type "
22405 "from FP to integer.");
22406 unsigned XBitWidth = X.getValueSizeInBits();
22407 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22408
22409 // An extract element return value type can be wider than its vector
22410 // operand element type. In that case, the high bits are undefined, so
22411 // it's possible that we may need to extend rather than truncate.
22412 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22413 assert(XBitWidth % VecEltBitWidth == 0 &&
22414 "Scalar bitwidth must be a multiple of vector element bitwidth");
22415 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22416 }
22417 }
22418 }
22419
22420 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22421 // We only perform this optimization before the op legalization phase because
22422 // we may introduce new vector instructions which are not backed by TD
22423 // patterns. For example on AVX, extracting elements from a wide vector
22424 // without using extract_subvector. However, if we can find an underlying
22425 // scalar value, then we can always use that.
22426 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22427 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22428 // Find the new index to extract from.
22429 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22430
22431 // Extracting an undef index is undef.
22432 if (OrigElt == -1)
22433 return DAG.getUNDEF(ScalarVT);
22434
22435 // Select the right vector half to extract from.
22436 SDValue SVInVec;
22437 if (OrigElt < (int)NumElts) {
22438 SVInVec = VecOp.getOperand(0);
22439 } else {
22440 SVInVec = VecOp.getOperand(1);
22441 OrigElt -= NumElts;
22442 }
22443
22444 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22445 SDValue InOp = SVInVec.getOperand(OrigElt);
22446 if (InOp.getValueType() != ScalarVT) {
22447 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22448 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22449 }
22450
22451 return InOp;
22452 }
22453
22454 // FIXME: We should handle recursing on other vector shuffles and
22455 // scalar_to_vector here as well.
22456
22457 if (!LegalOperations ||
22458 // FIXME: Should really be just isOperationLegalOrCustom.
22461 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22462 DAG.getVectorIdxConstant(OrigElt, DL));
22463 }
22464 }
22465
22466 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22467 // simplify it based on the (valid) extraction indices.
22468 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22469 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22470 Use->getOperand(0) == VecOp &&
22471 isa<ConstantSDNode>(Use->getOperand(1));
22472 })) {
22473 APInt DemandedElts = APInt::getZero(NumElts);
22474 for (SDNode *Use : VecOp->uses()) {
22475 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22476 if (CstElt->getAPIntValue().ult(NumElts))
22477 DemandedElts.setBit(CstElt->getZExtValue());
22478 }
22479 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22480 // We simplified the vector operand of this extract element. If this
22481 // extract is not dead, visit it again so it is folded properly.
22482 if (N->getOpcode() != ISD::DELETED_NODE)
22483 AddToWorklist(N);
22484 return SDValue(N, 0);
22485 }
22486 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22487 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22488 // We simplified the vector operand of this extract element. If this
22489 // extract is not dead, visit it again so it is folded properly.
22490 if (N->getOpcode() != ISD::DELETED_NODE)
22491 AddToWorklist(N);
22492 return SDValue(N, 0);
22493 }
22494 }
22495
22496 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22497 return SDValue(N, 0);
22498
22499 // Everything under here is trying to match an extract of a loaded value.
22500 // If the result of load has to be truncated, then it's not necessarily
22501 // profitable.
22502 bool BCNumEltsChanged = false;
22503 EVT ExtVT = VecVT.getVectorElementType();
22504 EVT LVT = ExtVT;
22505 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22506 return SDValue();
22507
22508 if (VecOp.getOpcode() == ISD::BITCAST) {
22509 // Don't duplicate a load with other uses.
22510 if (!VecOp.hasOneUse())
22511 return SDValue();
22512
22513 EVT BCVT = VecOp.getOperand(0).getValueType();
22514 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22515 return SDValue();
22516 if (NumElts != BCVT.getVectorNumElements())
22517 BCNumEltsChanged = true;
22518 VecOp = VecOp.getOperand(0);
22519 ExtVT = BCVT.getVectorElementType();
22520 }
22521
22522 // extract (vector load $addr), i --> load $addr + i * size
22523 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22524 ISD::isNormalLoad(VecOp.getNode()) &&
22525 !Index->hasPredecessor(VecOp.getNode())) {
22526 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22527 if (VecLoad && VecLoad->isSimple())
22528 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22529 }
22530
22531 // Perform only after legalization to ensure build_vector / vector_shuffle
22532 // optimizations have already been done.
22533 if (!LegalOperations || !IndexC)
22534 return SDValue();
22535
22536 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22537 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22538 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22539 int Elt = IndexC->getZExtValue();
22540 LoadSDNode *LN0 = nullptr;
22541 if (ISD::isNormalLoad(VecOp.getNode())) {
22542 LN0 = cast<LoadSDNode>(VecOp);
22543 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22544 VecOp.getOperand(0).getValueType() == ExtVT &&
22545 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22546 // Don't duplicate a load with other uses.
22547 if (!VecOp.hasOneUse())
22548 return SDValue();
22549
22550 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22551 }
22552 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22553 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22554 // =>
22555 // (load $addr+1*size)
22556
22557 // Don't duplicate a load with other uses.
22558 if (!VecOp.hasOneUse())
22559 return SDValue();
22560
22561 // If the bit convert changed the number of elements, it is unsafe
22562 // to examine the mask.
22563 if (BCNumEltsChanged)
22564 return SDValue();
22565
22566 // Select the input vector, guarding against out of range extract vector.
22567 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22568 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22569
22570 if (VecOp.getOpcode() == ISD::BITCAST) {
22571 // Don't duplicate a load with other uses.
22572 if (!VecOp.hasOneUse())
22573 return SDValue();
22574
22575 VecOp = VecOp.getOperand(0);
22576 }
22577 if (ISD::isNormalLoad(VecOp.getNode())) {
22578 LN0 = cast<LoadSDNode>(VecOp);
22579 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22580 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22581 }
22582 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22583 VecVT.getVectorElementType() == ScalarVT &&
22584 (!LegalTypes ||
22585 TLI.isTypeLegal(
22587 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22588 // -> extract_vector_elt a, 0
22589 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22590 // -> extract_vector_elt a, 1
22591 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22592 // -> extract_vector_elt b, 0
22593 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22594 // -> extract_vector_elt b, 1
22595 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22596 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22597 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22598 Index.getValueType());
22599
22600 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22602 ConcatVT.getVectorElementType(),
22603 ConcatOp, NewIdx);
22604 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22605 }
22606
22607 // Make sure we found a non-volatile load and the extractelement is
22608 // the only use.
22609 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22610 return SDValue();
22611
22612 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22613 if (Elt == -1)
22614 return DAG.getUNDEF(LVT);
22615
22616 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22617}
22618
22619// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22620SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22621 // We perform this optimization post type-legalization because
22622 // the type-legalizer often scalarizes integer-promoted vectors.
22623 // Performing this optimization before may create bit-casts which
22624 // will be type-legalized to complex code sequences.
22625 // We perform this optimization only before the operation legalizer because we
22626 // may introduce illegal operations.
22627 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22628 return SDValue();
22629
22630 unsigned NumInScalars = N->getNumOperands();
22631 SDLoc DL(N);
22632 EVT VT = N->getValueType(0);
22633
22634 // Check to see if this is a BUILD_VECTOR of a bunch of values
22635 // which come from any_extend or zero_extend nodes. If so, we can create
22636 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22637 // optimizations. We do not handle sign-extend because we can't fill the sign
22638 // using shuffles.
22639 EVT SourceType = MVT::Other;
22640 bool AllAnyExt = true;
22641
22642 for (unsigned i = 0; i != NumInScalars; ++i) {
22643 SDValue In = N->getOperand(i);
22644 // Ignore undef inputs.
22645 if (In.isUndef()) continue;
22646
22647 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22648 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22649
22650 // Abort if the element is not an extension.
22651 if (!ZeroExt && !AnyExt) {
22652 SourceType = MVT::Other;
22653 break;
22654 }
22655
22656 // The input is a ZeroExt or AnyExt. Check the original type.
22657 EVT InTy = In.getOperand(0).getValueType();
22658
22659 // Check that all of the widened source types are the same.
22660 if (SourceType == MVT::Other)
22661 // First time.
22662 SourceType = InTy;
22663 else if (InTy != SourceType) {
22664 // Multiple income types. Abort.
22665 SourceType = MVT::Other;
22666 break;
22667 }
22668
22669 // Check if all of the extends are ANY_EXTENDs.
22670 AllAnyExt &= AnyExt;
22671 }
22672
22673 // In order to have valid types, all of the inputs must be extended from the
22674 // same source type and all of the inputs must be any or zero extend.
22675 // Scalar sizes must be a power of two.
22676 EVT OutScalarTy = VT.getScalarType();
22677 bool ValidTypes =
22678 SourceType != MVT::Other &&
22679 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22680 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22681
22682 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22683 // turn into a single shuffle instruction.
22684 if (!ValidTypes)
22685 return SDValue();
22686
22687 // If we already have a splat buildvector, then don't fold it if it means
22688 // introducing zeros.
22689 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22690 return SDValue();
22691
22692 bool isLE = DAG.getDataLayout().isLittleEndian();
22693 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22694 assert(ElemRatio > 1 && "Invalid element size ratio");
22695 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22696 DAG.getConstant(0, DL, SourceType);
22697
22698 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22699 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22700
22701 // Populate the new build_vector
22702 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22703 SDValue Cast = N->getOperand(i);
22704 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22705 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22706 Cast.isUndef()) && "Invalid cast opcode");
22707 SDValue In;
22708 if (Cast.isUndef())
22709 In = DAG.getUNDEF(SourceType);
22710 else
22711 In = Cast->getOperand(0);
22712 unsigned Index = isLE ? (i * ElemRatio) :
22713 (i * ElemRatio + (ElemRatio - 1));
22714
22715 assert(Index < Ops.size() && "Invalid index");
22716 Ops[Index] = In;
22717 }
22718
22719 // The type of the new BUILD_VECTOR node.
22720 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22721 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22722 "Invalid vector size");
22723 // Check if the new vector type is legal.
22724 if (!isTypeLegal(VecVT) ||
22725 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22727 return SDValue();
22728
22729 // Make the new BUILD_VECTOR.
22730 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22731
22732 // The new BUILD_VECTOR node has the potential to be further optimized.
22733 AddToWorklist(BV.getNode());
22734 // Bitcast to the desired type.
22735 return DAG.getBitcast(VT, BV);
22736}
22737
22738// Simplify (build_vec (trunc $1)
22739// (trunc (srl $1 half-width))
22740// (trunc (srl $1 (2 * half-width))))
22741// to (bitcast $1)
22742SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22743 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22744
22745 EVT VT = N->getValueType(0);
22746
22747 // Don't run this before LegalizeTypes if VT is legal.
22748 // Targets may have other preferences.
22749 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22750 return SDValue();
22751
22752 // Only for little endian
22753 if (!DAG.getDataLayout().isLittleEndian())
22754 return SDValue();
22755
22756 SDLoc DL(N);
22757 EVT OutScalarTy = VT.getScalarType();
22758 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22759
22760 // Only for power of two types to be sure that bitcast works well
22761 if (!isPowerOf2_64(ScalarTypeBitsize))
22762 return SDValue();
22763
22764 unsigned NumInScalars = N->getNumOperands();
22765
22766 // Look through bitcasts
22767 auto PeekThroughBitcast = [](SDValue Op) {
22768 if (Op.getOpcode() == ISD::BITCAST)
22769 return Op.getOperand(0);
22770 return Op;
22771 };
22772
22773 // The source value where all the parts are extracted.
22774 SDValue Src;
22775 for (unsigned i = 0; i != NumInScalars; ++i) {
22776 SDValue In = PeekThroughBitcast(N->getOperand(i));
22777 // Ignore undef inputs.
22778 if (In.isUndef()) continue;
22779
22780 if (In.getOpcode() != ISD::TRUNCATE)
22781 return SDValue();
22782
22783 In = PeekThroughBitcast(In.getOperand(0));
22784
22785 if (In.getOpcode() != ISD::SRL) {
22786 // For now only build_vec without shuffling, handle shifts here in the
22787 // future.
22788 if (i != 0)
22789 return SDValue();
22790
22791 Src = In;
22792 } else {
22793 // In is SRL
22794 SDValue part = PeekThroughBitcast(In.getOperand(0));
22795
22796 if (!Src) {
22797 Src = part;
22798 } else if (Src != part) {
22799 // Vector parts do not stem from the same variable
22800 return SDValue();
22801 }
22802
22803 SDValue ShiftAmtVal = In.getOperand(1);
22804 if (!isa<ConstantSDNode>(ShiftAmtVal))
22805 return SDValue();
22806
22807 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22808
22809 // The extracted value is not extracted at the right position
22810 if (ShiftAmt != i * ScalarTypeBitsize)
22811 return SDValue();
22812 }
22813 }
22814
22815 // Only cast if the size is the same
22816 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22817 return SDValue();
22818
22819 return DAG.getBitcast(VT, Src);
22820}
22821
22822SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22823 ArrayRef<int> VectorMask,
22824 SDValue VecIn1, SDValue VecIn2,
22825 unsigned LeftIdx, bool DidSplitVec) {
22826 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22827
22828 EVT VT = N->getValueType(0);
22829 EVT InVT1 = VecIn1.getValueType();
22830 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22831
22832 unsigned NumElems = VT.getVectorNumElements();
22833 unsigned ShuffleNumElems = NumElems;
22834
22835 // If we artificially split a vector in two already, then the offsets in the
22836 // operands will all be based off of VecIn1, even those in VecIn2.
22837 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22838
22839 uint64_t VTSize = VT.getFixedSizeInBits();
22840 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22841 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22842
22843 assert(InVT2Size <= InVT1Size &&
22844 "Inputs must be sorted to be in non-increasing vector size order.");
22845
22846 // We can't generate a shuffle node with mismatched input and output types.
22847 // Try to make the types match the type of the output.
22848 if (InVT1 != VT || InVT2 != VT) {
22849 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22850 // If the output vector length is a multiple of both input lengths,
22851 // we can concatenate them and pad the rest with undefs.
22852 unsigned NumConcats = VTSize / InVT1Size;
22853 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22854 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22855 ConcatOps[0] = VecIn1;
22856 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22857 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22858 VecIn2 = SDValue();
22859 } else if (InVT1Size == VTSize * 2) {
22860 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22861 return SDValue();
22862
22863 if (!VecIn2.getNode()) {
22864 // If we only have one input vector, and it's twice the size of the
22865 // output, split it in two.
22866 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22867 DAG.getVectorIdxConstant(NumElems, DL));
22868 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22869 // Since we now have shorter input vectors, adjust the offset of the
22870 // second vector's start.
22871 Vec2Offset = NumElems;
22872 } else {
22873 assert(InVT2Size <= InVT1Size &&
22874 "Second input is not going to be larger than the first one.");
22875
22876 // VecIn1 is wider than the output, and we have another, possibly
22877 // smaller input. Pad the smaller input with undefs, shuffle at the
22878 // input vector width, and extract the output.
22879 // The shuffle type is different than VT, so check legality again.
22880 if (LegalOperations &&
22882 return SDValue();
22883
22884 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22885 // lower it back into a BUILD_VECTOR. So if the inserted type is
22886 // illegal, don't even try.
22887 if (InVT1 != InVT2) {
22888 if (!TLI.isTypeLegal(InVT2))
22889 return SDValue();
22890 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22891 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22892 }
22893 ShuffleNumElems = NumElems * 2;
22894 }
22895 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22896 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22897 ConcatOps[0] = VecIn2;
22898 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22899 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22900 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22901 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22902 return SDValue();
22903 // If dest vector has less than two elements, then use shuffle and extract
22904 // from larger regs will cost even more.
22905 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22906 return SDValue();
22907 assert(InVT2Size <= InVT1Size &&
22908 "Second input is not going to be larger than the first one.");
22909
22910 // VecIn1 is wider than the output, and we have another, possibly
22911 // smaller input. Pad the smaller input with undefs, shuffle at the
22912 // input vector width, and extract the output.
22913 // The shuffle type is different than VT, so check legality again.
22914 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22915 return SDValue();
22916
22917 if (InVT1 != InVT2) {
22918 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22919 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22920 }
22921 ShuffleNumElems = InVT1Size / VTSize * NumElems;
22922 } else {
22923 // TODO: Support cases where the length mismatch isn't exactly by a
22924 // factor of 2.
22925 // TODO: Move this check upwards, so that if we have bad type
22926 // mismatches, we don't create any DAG nodes.
22927 return SDValue();
22928 }
22929 }
22930
22931 // Initialize mask to undef.
22932 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22933
22934 // Only need to run up to the number of elements actually used, not the
22935 // total number of elements in the shuffle - if we are shuffling a wider
22936 // vector, the high lanes should be set to undef.
22937 for (unsigned i = 0; i != NumElems; ++i) {
22938 if (VectorMask[i] <= 0)
22939 continue;
22940
22941 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22942 if (VectorMask[i] == (int)LeftIdx) {
22943 Mask[i] = ExtIndex;
22944 } else if (VectorMask[i] == (int)LeftIdx + 1) {
22945 Mask[i] = Vec2Offset + ExtIndex;
22946 }
22947 }
22948
22949 // The type the input vectors may have changed above.
22950 InVT1 = VecIn1.getValueType();
22951
22952 // If we already have a VecIn2, it should have the same type as VecIn1.
22953 // If we don't, get an undef/zero vector of the appropriate type.
22954 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22955 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22956
22957 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22958 if (ShuffleNumElems > NumElems)
22959 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22960
22961 return Shuffle;
22962}
22963
22965 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22966
22967 // First, determine where the build vector is not undef.
22968 // TODO: We could extend this to handle zero elements as well as undefs.
22969 int NumBVOps = BV->getNumOperands();
22970 int ZextElt = -1;
22971 for (int i = 0; i != NumBVOps; ++i) {
22972 SDValue Op = BV->getOperand(i);
22973 if (Op.isUndef())
22974 continue;
22975 if (ZextElt == -1)
22976 ZextElt = i;
22977 else
22978 return SDValue();
22979 }
22980 // Bail out if there's no non-undef element.
22981 if (ZextElt == -1)
22982 return SDValue();
22983
22984 // The build vector contains some number of undef elements and exactly
22985 // one other element. That other element must be a zero-extended scalar
22986 // extracted from a vector at a constant index to turn this into a shuffle.
22987 // Also, require that the build vector does not implicitly truncate/extend
22988 // its elements.
22989 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22990 EVT VT = BV->getValueType(0);
22991 SDValue Zext = BV->getOperand(ZextElt);
22992 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22994 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22996 return SDValue();
22997
22998 // The zero-extend must be a multiple of the source size, and we must be
22999 // building a vector of the same size as the source of the extract element.
23000 SDValue Extract = Zext.getOperand(0);
23001 unsigned DestSize = Zext.getValueSizeInBits();
23002 unsigned SrcSize = Extract.getValueSizeInBits();
23003 if (DestSize % SrcSize != 0 ||
23004 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23005 return SDValue();
23006
23007 // Create a shuffle mask that will combine the extracted element with zeros
23008 // and undefs.
23009 int ZextRatio = DestSize / SrcSize;
23010 int NumMaskElts = NumBVOps * ZextRatio;
23011 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23012 for (int i = 0; i != NumMaskElts; ++i) {
23013 if (i / ZextRatio == ZextElt) {
23014 // The low bits of the (potentially translated) extracted element map to
23015 // the source vector. The high bits map to zero. We will use a zero vector
23016 // as the 2nd source operand of the shuffle, so use the 1st element of
23017 // that vector (mask value is number-of-elements) for the high bits.
23018 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23019 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23020 : NumMaskElts;
23021 }
23022
23023 // Undef elements of the build vector remain undef because we initialize
23024 // the shuffle mask with -1.
23025 }
23026
23027 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23028 // bitcast (shuffle V, ZeroVec, VectorMask)
23029 SDLoc DL(BV);
23030 EVT VecVT = Extract.getOperand(0).getValueType();
23031 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23032 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23033 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23034 ZeroVec, ShufMask, DAG);
23035 if (!Shuf)
23036 return SDValue();
23037 return DAG.getBitcast(VT, Shuf);
23038}
23039
23040// FIXME: promote to STLExtras.
23041template <typename R, typename T>
23042static auto getFirstIndexOf(R &&Range, const T &Val) {
23043 auto I = find(Range, Val);
23044 if (I == Range.end())
23045 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23046 return std::distance(Range.begin(), I);
23047}
23048
23049// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23050// operations. If the types of the vectors we're extracting from allow it,
23051// turn this into a vector_shuffle node.
23052SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23053 SDLoc DL(N);
23054 EVT VT = N->getValueType(0);
23055
23056 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23057 if (!isTypeLegal(VT))
23058 return SDValue();
23059
23061 return V;
23062
23063 // May only combine to shuffle after legalize if shuffle is legal.
23064 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23065 return SDValue();
23066
23067 bool UsesZeroVector = false;
23068 unsigned NumElems = N->getNumOperands();
23069
23070 // Record, for each element of the newly built vector, which input vector
23071 // that element comes from. -1 stands for undef, 0 for the zero vector,
23072 // and positive values for the input vectors.
23073 // VectorMask maps each element to its vector number, and VecIn maps vector
23074 // numbers to their initial SDValues.
23075
23076 SmallVector<int, 8> VectorMask(NumElems, -1);
23078 VecIn.push_back(SDValue());
23079
23080 for (unsigned i = 0; i != NumElems; ++i) {
23081 SDValue Op = N->getOperand(i);
23082
23083 if (Op.isUndef())
23084 continue;
23085
23086 // See if we can use a blend with a zero vector.
23087 // TODO: Should we generalize this to a blend with an arbitrary constant
23088 // vector?
23090 UsesZeroVector = true;
23091 VectorMask[i] = 0;
23092 continue;
23093 }
23094
23095 // Not an undef or zero. If the input is something other than an
23096 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23097 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23098 !isa<ConstantSDNode>(Op.getOperand(1)))
23099 return SDValue();
23100 SDValue ExtractedFromVec = Op.getOperand(0);
23101
23102 if (ExtractedFromVec.getValueType().isScalableVector())
23103 return SDValue();
23104
23105 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23106 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23107 return SDValue();
23108
23109 // All inputs must have the same element type as the output.
23110 if (VT.getVectorElementType() !=
23111 ExtractedFromVec.getValueType().getVectorElementType())
23112 return SDValue();
23113
23114 // Have we seen this input vector before?
23115 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23116 // a map back from SDValues to numbers isn't worth it.
23117 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23118 if (Idx == -1) { // A new source vector?
23119 Idx = VecIn.size();
23120 VecIn.push_back(ExtractedFromVec);
23121 }
23122
23123 VectorMask[i] = Idx;
23124 }
23125
23126 // If we didn't find at least one input vector, bail out.
23127 if (VecIn.size() < 2)
23128 return SDValue();
23129
23130 // If all the Operands of BUILD_VECTOR extract from same
23131 // vector, then split the vector efficiently based on the maximum
23132 // vector access index and adjust the VectorMask and
23133 // VecIn accordingly.
23134 bool DidSplitVec = false;
23135 if (VecIn.size() == 2) {
23136 unsigned MaxIndex = 0;
23137 unsigned NearestPow2 = 0;
23138 SDValue Vec = VecIn.back();
23139 EVT InVT = Vec.getValueType();
23140 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23141
23142 for (unsigned i = 0; i < NumElems; i++) {
23143 if (VectorMask[i] <= 0)
23144 continue;
23145 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23146 IndexVec[i] = Index;
23147 MaxIndex = std::max(MaxIndex, Index);
23148 }
23149
23150 NearestPow2 = PowerOf2Ceil(MaxIndex);
23151 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23152 NumElems * 2 < NearestPow2) {
23153 unsigned SplitSize = NearestPow2 / 2;
23154 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23155 InVT.getVectorElementType(), SplitSize);
23156 if (TLI.isTypeLegal(SplitVT) &&
23157 SplitSize + SplitVT.getVectorNumElements() <=
23158 InVT.getVectorNumElements()) {
23159 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23160 DAG.getVectorIdxConstant(SplitSize, DL));
23161 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23162 DAG.getVectorIdxConstant(0, DL));
23163 VecIn.pop_back();
23164 VecIn.push_back(VecIn1);
23165 VecIn.push_back(VecIn2);
23166 DidSplitVec = true;
23167
23168 for (unsigned i = 0; i < NumElems; i++) {
23169 if (VectorMask[i] <= 0)
23170 continue;
23171 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23172 }
23173 }
23174 }
23175 }
23176
23177 // Sort input vectors by decreasing vector element count,
23178 // while preserving the relative order of equally-sized vectors.
23179 // Note that we keep the first "implicit zero vector as-is.
23180 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23181 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23182 [](const SDValue &a, const SDValue &b) {
23183 return a.getValueType().getVectorNumElements() >
23184 b.getValueType().getVectorNumElements();
23185 });
23186
23187 // We now also need to rebuild the VectorMask, because it referenced element
23188 // order in VecIn, and we just sorted them.
23189 for (int &SourceVectorIndex : VectorMask) {
23190 if (SourceVectorIndex <= 0)
23191 continue;
23192 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23193 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23194 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23195 SourceVectorIndex = Idx;
23196 }
23197
23198 VecIn = std::move(SortedVecIn);
23199
23200 // TODO: Should this fire if some of the input vectors has illegal type (like
23201 // it does now), or should we let legalization run its course first?
23202
23203 // Shuffle phase:
23204 // Take pairs of vectors, and shuffle them so that the result has elements
23205 // from these vectors in the correct places.
23206 // For example, given:
23207 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23208 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23209 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23210 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23211 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23212 // We will generate:
23213 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23214 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23215 SmallVector<SDValue, 4> Shuffles;
23216 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23217 unsigned LeftIdx = 2 * In + 1;
23218 SDValue VecLeft = VecIn[LeftIdx];
23219 SDValue VecRight =
23220 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23221
23222 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23223 VecRight, LeftIdx, DidSplitVec))
23224 Shuffles.push_back(Shuffle);
23225 else
23226 return SDValue();
23227 }
23228
23229 // If we need the zero vector as an "ingredient" in the blend tree, add it
23230 // to the list of shuffles.
23231 if (UsesZeroVector)
23232 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23233 : DAG.getConstantFP(0.0, DL, VT));
23234
23235 // If we only have one shuffle, we're done.
23236 if (Shuffles.size() == 1)
23237 return Shuffles[0];
23238
23239 // Update the vector mask to point to the post-shuffle vectors.
23240 for (int &Vec : VectorMask)
23241 if (Vec == 0)
23242 Vec = Shuffles.size() - 1;
23243 else
23244 Vec = (Vec - 1) / 2;
23245
23246 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23247 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23248 // generate:
23249 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23250 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23251 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23252 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23253 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23254 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23255 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23256
23257 // Make sure the initial size of the shuffle list is even.
23258 if (Shuffles.size() % 2)
23259 Shuffles.push_back(DAG.getUNDEF(VT));
23260
23261 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23262 if (CurSize % 2) {
23263 Shuffles[CurSize] = DAG.getUNDEF(VT);
23264 CurSize++;
23265 }
23266 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23267 int Left = 2 * In;
23268 int Right = 2 * In + 1;
23269 SmallVector<int, 8> Mask(NumElems, -1);
23270 SDValue L = Shuffles[Left];
23271 ArrayRef<int> LMask;
23272 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23273 L.use_empty() && L.getOperand(1).isUndef() &&
23274 L.getOperand(0).getValueType() == L.getValueType();
23275 if (IsLeftShuffle) {
23276 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23277 L = L.getOperand(0);
23278 }
23279 SDValue R = Shuffles[Right];
23280 ArrayRef<int> RMask;
23281 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23282 R.use_empty() && R.getOperand(1).isUndef() &&
23283 R.getOperand(0).getValueType() == R.getValueType();
23284 if (IsRightShuffle) {
23285 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23286 R = R.getOperand(0);
23287 }
23288 for (unsigned I = 0; I != NumElems; ++I) {
23289 if (VectorMask[I] == Left) {
23290 Mask[I] = I;
23291 if (IsLeftShuffle)
23292 Mask[I] = LMask[I];
23293 VectorMask[I] = In;
23294 } else if (VectorMask[I] == Right) {
23295 Mask[I] = I + NumElems;
23296 if (IsRightShuffle)
23297 Mask[I] = RMask[I] + NumElems;
23298 VectorMask[I] = In;
23299 }
23300 }
23301
23302 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23303 }
23304 }
23305 return Shuffles[0];
23306}
23307
23308// Try to turn a build vector of zero extends of extract vector elts into a
23309// a vector zero extend and possibly an extract subvector.
23310// TODO: Support sign extend?
23311// TODO: Allow undef elements?
23312SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23313 if (LegalOperations)
23314 return SDValue();
23315
23316 EVT VT = N->getValueType(0);
23317
23318 bool FoundZeroExtend = false;
23319 SDValue Op0 = N->getOperand(0);
23320 auto checkElem = [&](SDValue Op) -> int64_t {
23321 unsigned Opc = Op.getOpcode();
23322 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23323 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23324 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23325 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23326 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23327 return C->getZExtValue();
23328 return -1;
23329 };
23330
23331 // Make sure the first element matches
23332 // (zext (extract_vector_elt X, C))
23333 // Offset must be a constant multiple of the
23334 // known-minimum vector length of the result type.
23335 int64_t Offset = checkElem(Op0);
23336 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23337 return SDValue();
23338
23339 unsigned NumElems = N->getNumOperands();
23340 SDValue In = Op0.getOperand(0).getOperand(0);
23341 EVT InSVT = In.getValueType().getScalarType();
23342 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23343
23344 // Don't create an illegal input type after type legalization.
23345 if (LegalTypes && !TLI.isTypeLegal(InVT))
23346 return SDValue();
23347
23348 // Ensure all the elements come from the same vector and are adjacent.
23349 for (unsigned i = 1; i != NumElems; ++i) {
23350 if ((Offset + i) != checkElem(N->getOperand(i)))
23351 return SDValue();
23352 }
23353
23354 SDLoc DL(N);
23355 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23356 Op0.getOperand(0).getOperand(1));
23357 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23358 VT, In);
23359}
23360
23361// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23362// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23363// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23364// This patten can appear during legalization.
23365//
23366// NOTE: This can be generalized to allow more than a single
23367// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23368SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23369 // Don't run this after legalization. Targets may have other preferences.
23370 if (Level >= AfterLegalizeDAG)
23371 return SDValue();
23372
23373 // FIXME: support big-endian.
23374 if (DAG.getDataLayout().isBigEndian())
23375 return SDValue();
23376
23377 EVT VT = N->getValueType(0);
23378 EVT OpVT = N->getOperand(0).getValueType();
23379 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23380
23381 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23382
23383 if (!TLI.isTypeLegal(OpIntVT) ||
23384 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23385 return SDValue();
23386
23387 unsigned EltBitwidth = VT.getScalarSizeInBits();
23388 // NOTE: the actual width of operands may be wider than that!
23389
23390 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23391 // active bits they all have? We'll want to truncate them all to that width.
23392 unsigned ActiveBits = 0;
23393 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23394 for (auto I : enumerate(N->ops())) {
23395 SDValue Op = I.value();
23396 // FIXME: support UNDEF elements?
23397 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23398 unsigned OpActiveBits =
23399 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23400 if (OpActiveBits == 0) {
23401 KnownZeroOps.setBit(I.index());
23402 continue;
23403 }
23404 // Profitability check: don't allow non-zero constant operands.
23405 return SDValue();
23406 }
23407 // Profitability check: there must only be a single non-zero operand,
23408 // and it must be the first operand of the BUILD_VECTOR.
23409 if (I.index() != 0)
23410 return SDValue();
23411 // The operand must be a zero-extension itself.
23412 // FIXME: this could be generalized to known leading zeros check.
23413 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23414 return SDValue();
23415 unsigned CurrActiveBits =
23416 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23417 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23418 ActiveBits = CurrActiveBits;
23419 // We want to at least halve the element size.
23420 if (2 * ActiveBits > EltBitwidth)
23421 return SDValue();
23422 }
23423
23424 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23425 if (ActiveBits == 0)
23426 return SDValue();
23427
23428 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23429 // into how many chunks can we split our element width?
23430 EVT NewScalarIntVT, NewIntVT;
23431 std::optional<unsigned> Factor;
23432 // We can split the element into at least two chunks, but not into more
23433 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23434 // for which the element width is a multiple of it,
23435 // and the resulting types/operations on that chunk width are legal.
23436 assert(2 * ActiveBits <= EltBitwidth &&
23437 "We know that half or less bits of the element are active.");
23438 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23439 if (EltBitwidth % Scale != 0)
23440 continue;
23441 unsigned ChunkBitwidth = EltBitwidth / Scale;
23442 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23443 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23444 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23445 Scale * N->getNumOperands());
23446 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23447 (LegalOperations &&
23448 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23450 continue;
23451 Factor = Scale;
23452 break;
23453 }
23454 if (!Factor)
23455 return SDValue();
23456
23457 SDLoc DL(N);
23458 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23459
23460 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23462 NewOps.reserve(NewIntVT.getVectorNumElements());
23463 for (auto I : enumerate(N->ops())) {
23464 SDValue Op = I.value();
23465 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23466 unsigned SrcOpIdx = I.index();
23467 if (KnownZeroOps[SrcOpIdx]) {
23468 NewOps.append(*Factor, ZeroOp);
23469 continue;
23470 }
23471 Op = DAG.getBitcast(OpIntVT, Op);
23472 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23473 NewOps.emplace_back(Op);
23474 NewOps.append(*Factor - 1, ZeroOp);
23475 }
23476 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23477 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23478 NewBV = DAG.getBitcast(VT, NewBV);
23479 return NewBV;
23480}
23481
23482SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23483 EVT VT = N->getValueType(0);
23484
23485 // A vector built entirely of undefs is undef.
23487 return DAG.getUNDEF(VT);
23488
23489 // If this is a splat of a bitcast from another vector, change to a
23490 // concat_vector.
23491 // For example:
23492 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23493 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23494 //
23495 // If X is a build_vector itself, the concat can become a larger build_vector.
23496 // TODO: Maybe this is useful for non-splat too?
23497 if (!LegalOperations) {
23498 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23499 // Only change build_vector to a concat_vector if the splat value type is
23500 // same as the vector element type.
23501 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23503 EVT SrcVT = Splat.getValueType();
23504 if (SrcVT.isVector()) {
23505 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23506 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23507 SrcVT.getVectorElementType(), NumElts);
23508 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23509 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23510 SDValue Concat =
23511 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23512 return DAG.getBitcast(VT, Concat);
23513 }
23514 }
23515 }
23516 }
23517
23518 // Check if we can express BUILD VECTOR via subvector extract.
23519 if (!LegalTypes && (N->getNumOperands() > 1)) {
23520 SDValue Op0 = N->getOperand(0);
23521 auto checkElem = [&](SDValue Op) -> uint64_t {
23522 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23523 (Op0.getOperand(0) == Op.getOperand(0)))
23524 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23525 return CNode->getZExtValue();
23526 return -1;
23527 };
23528
23529 int Offset = checkElem(Op0);
23530 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23531 if (Offset + i != checkElem(N->getOperand(i))) {
23532 Offset = -1;
23533 break;
23534 }
23535 }
23536
23537 if ((Offset == 0) &&
23538 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23539 return Op0.getOperand(0);
23540 if ((Offset != -1) &&
23541 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23542 0)) // IDX must be multiple of output size.
23543 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23544 Op0.getOperand(0), Op0.getOperand(1));
23545 }
23546
23547 if (SDValue V = convertBuildVecZextToZext(N))
23548 return V;
23549
23550 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23551 return V;
23552
23553 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23554 return V;
23555
23556 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23557 return V;
23558
23559 if (SDValue V = reduceBuildVecToShuffle(N))
23560 return V;
23561
23562 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23563 // Do this late as some of the above may replace the splat.
23565 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23566 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23567 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23568 }
23569
23570 return SDValue();
23571}
23572
23574 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23575 EVT OpVT = N->getOperand(0).getValueType();
23576
23577 // If the operands are legal vectors, leave them alone.
23578 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23579 return SDValue();
23580
23581 SDLoc DL(N);
23582 EVT VT = N->getValueType(0);
23584 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23585
23586 // Keep track of what we encounter.
23587 bool AnyInteger = false;
23588 bool AnyFP = false;
23589 for (const SDValue &Op : N->ops()) {
23590 if (ISD::BITCAST == Op.getOpcode() &&
23591 !Op.getOperand(0).getValueType().isVector())
23592 Ops.push_back(Op.getOperand(0));
23593 else if (ISD::UNDEF == Op.getOpcode())
23594 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23595 else
23596 return SDValue();
23597
23598 // Note whether we encounter an integer or floating point scalar.
23599 // If it's neither, bail out, it could be something weird like x86mmx.
23600 EVT LastOpVT = Ops.back().getValueType();
23601 if (LastOpVT.isFloatingPoint())
23602 AnyFP = true;
23603 else if (LastOpVT.isInteger())
23604 AnyInteger = true;
23605 else
23606 return SDValue();
23607 }
23608
23609 // If any of the operands is a floating point scalar bitcast to a vector,
23610 // use floating point types throughout, and bitcast everything.
23611 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23612 if (AnyFP) {
23614 if (AnyInteger) {
23615 for (SDValue &Op : Ops) {
23616 if (Op.getValueType() == SVT)
23617 continue;
23618 if (Op.isUndef())
23619 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23620 else
23621 Op = DAG.getBitcast(SVT, Op);
23622 }
23623 }
23624 }
23625
23626 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23627 VT.getSizeInBits() / SVT.getSizeInBits());
23628 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23629}
23630
23631// Attempt to merge nested concat_vectors/undefs.
23632// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23633// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23635 SelectionDAG &DAG) {
23636 EVT VT = N->getValueType(0);
23637
23638 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23639 EVT SubVT;
23640 SDValue FirstConcat;
23641 for (const SDValue &Op : N->ops()) {
23642 if (Op.isUndef())
23643 continue;
23644 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23645 return SDValue();
23646 if (!FirstConcat) {
23647 SubVT = Op.getOperand(0).getValueType();
23648 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23649 return SDValue();
23650 FirstConcat = Op;
23651 continue;
23652 }
23653 if (SubVT != Op.getOperand(0).getValueType())
23654 return SDValue();
23655 }
23656 assert(FirstConcat && "Concat of all-undefs found");
23657
23658 SmallVector<SDValue> ConcatOps;
23659 for (const SDValue &Op : N->ops()) {
23660 if (Op.isUndef()) {
23661 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23662 continue;
23663 }
23664 ConcatOps.append(Op->op_begin(), Op->op_end());
23665 }
23666 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23667}
23668
23669// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23670// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23671// most two distinct vectors the same size as the result, attempt to turn this
23672// into a legal shuffle.
23674 EVT VT = N->getValueType(0);
23675 EVT OpVT = N->getOperand(0).getValueType();
23676
23677 // We currently can't generate an appropriate shuffle for a scalable vector.
23678 if (VT.isScalableVector())
23679 return SDValue();
23680
23681 int NumElts = VT.getVectorNumElements();
23682 int NumOpElts = OpVT.getVectorNumElements();
23683
23684 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23686
23687 for (SDValue Op : N->ops()) {
23689
23690 // UNDEF nodes convert to UNDEF shuffle mask values.
23691 if (Op.isUndef()) {
23692 Mask.append((unsigned)NumOpElts, -1);
23693 continue;
23694 }
23695
23696 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23697 return SDValue();
23698
23699 // What vector are we extracting the subvector from and at what index?
23700 SDValue ExtVec = Op.getOperand(0);
23701 int ExtIdx = Op.getConstantOperandVal(1);
23702
23703 // We want the EVT of the original extraction to correctly scale the
23704 // extraction index.
23705 EVT ExtVT = ExtVec.getValueType();
23706 ExtVec = peekThroughBitcasts(ExtVec);
23707
23708 // UNDEF nodes convert to UNDEF shuffle mask values.
23709 if (ExtVec.isUndef()) {
23710 Mask.append((unsigned)NumOpElts, -1);
23711 continue;
23712 }
23713
23714 // Ensure that we are extracting a subvector from a vector the same
23715 // size as the result.
23716 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23717 return SDValue();
23718
23719 // Scale the subvector index to account for any bitcast.
23720 int NumExtElts = ExtVT.getVectorNumElements();
23721 if (0 == (NumExtElts % NumElts))
23722 ExtIdx /= (NumExtElts / NumElts);
23723 else if (0 == (NumElts % NumExtElts))
23724 ExtIdx *= (NumElts / NumExtElts);
23725 else
23726 return SDValue();
23727
23728 // At most we can reference 2 inputs in the final shuffle.
23729 if (SV0.isUndef() || SV0 == ExtVec) {
23730 SV0 = ExtVec;
23731 for (int i = 0; i != NumOpElts; ++i)
23732 Mask.push_back(i + ExtIdx);
23733 } else if (SV1.isUndef() || SV1 == ExtVec) {
23734 SV1 = ExtVec;
23735 for (int i = 0; i != NumOpElts; ++i)
23736 Mask.push_back(i + ExtIdx + NumElts);
23737 } else {
23738 return SDValue();
23739 }
23740 }
23741
23742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23743 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23744 DAG.getBitcast(VT, SV1), Mask, DAG);
23745}
23746
23748 unsigned CastOpcode = N->getOperand(0).getOpcode();
23749 switch (CastOpcode) {
23750 case ISD::SINT_TO_FP:
23751 case ISD::UINT_TO_FP:
23752 case ISD::FP_TO_SINT:
23753 case ISD::FP_TO_UINT:
23754 // TODO: Allow more opcodes?
23755 // case ISD::BITCAST:
23756 // case ISD::TRUNCATE:
23757 // case ISD::ZERO_EXTEND:
23758 // case ISD::SIGN_EXTEND:
23759 // case ISD::FP_EXTEND:
23760 break;
23761 default:
23762 return SDValue();
23763 }
23764
23765 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23766 if (!SrcVT.isVector())
23767 return SDValue();
23768
23769 // All operands of the concat must be the same kind of cast from the same
23770 // source type.
23772 for (SDValue Op : N->ops()) {
23773 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23774 Op.getOperand(0).getValueType() != SrcVT)
23775 return SDValue();
23776 SrcOps.push_back(Op.getOperand(0));
23777 }
23778
23779 // The wider cast must be supported by the target. This is unusual because
23780 // the operation support type parameter depends on the opcode. In addition,
23781 // check the other type in the cast to make sure this is really legal.
23782 EVT VT = N->getValueType(0);
23783 EVT SrcEltVT = SrcVT.getVectorElementType();
23784 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23785 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23787 switch (CastOpcode) {
23788 case ISD::SINT_TO_FP:
23789 case ISD::UINT_TO_FP:
23790 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23791 !TLI.isTypeLegal(VT))
23792 return SDValue();
23793 break;
23794 case ISD::FP_TO_SINT:
23795 case ISD::FP_TO_UINT:
23796 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23797 !TLI.isTypeLegal(ConcatSrcVT))
23798 return SDValue();
23799 break;
23800 default:
23801 llvm_unreachable("Unexpected cast opcode");
23802 }
23803
23804 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23805 SDLoc DL(N);
23806 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23807 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23808}
23809
23810// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23811// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23812// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23814 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23815 bool LegalOperations) {
23816 EVT VT = N->getValueType(0);
23817 EVT OpVT = N->getOperand(0).getValueType();
23818 if (VT.isScalableVector())
23819 return SDValue();
23820
23821 // For now, only allow simple 2-operand concatenations.
23822 if (N->getNumOperands() != 2)
23823 return SDValue();
23824
23825 // Don't create illegal types/shuffles when not allowed to.
23826 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23827 (LegalOperations &&
23829 return SDValue();
23830
23831 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23832 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23833 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23834 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23835 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23836 ShuffleVectorSDNode *SVN = nullptr;
23837 for (SDValue Op : N->ops()) {
23838 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23839 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23840 all_of(N->ops(), [CurSVN](SDValue Op) {
23841 // FIXME: can we allow UNDEF operands?
23842 return !Op.isUndef() &&
23843 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23844 })) {
23845 SVN = CurSVN;
23846 break;
23847 }
23848 }
23849 if (!SVN)
23850 return SDValue();
23851
23852 // We are going to pad the shuffle operands, so any indice, that was picking
23853 // from the second operand, must be adjusted.
23854 SmallVector<int, 16> AdjustedMask;
23855 AdjustedMask.reserve(SVN->getMask().size());
23856 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23857 append_range(AdjustedMask, SVN->getMask());
23858
23859 // Identity masks for the operands of the (padded) shuffle.
23860 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23861 MutableArrayRef<int> FirstShufOpIdentityMask =
23862 MutableArrayRef<int>(IdentityMask)
23864 MutableArrayRef<int> SecondShufOpIdentityMask =
23866 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23867 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23869
23870 // New combined shuffle mask.
23872 Mask.reserve(VT.getVectorNumElements());
23873 for (SDValue Op : N->ops()) {
23874 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23875 if (Op.getNode() == SVN) {
23876 append_range(Mask, AdjustedMask);
23877 continue;
23878 }
23879 if (Op == SVN->getOperand(0)) {
23880 append_range(Mask, FirstShufOpIdentityMask);
23881 continue;
23882 }
23883 if (Op == SVN->getOperand(1)) {
23884 append_range(Mask, SecondShufOpIdentityMask);
23885 continue;
23886 }
23887 llvm_unreachable("Unexpected operand!");
23888 }
23889
23890 // Don't create illegal shuffle masks.
23891 if (!TLI.isShuffleMaskLegal(Mask, VT))
23892 return SDValue();
23893
23894 // Pad the shuffle operands with UNDEF.
23895 SDLoc dl(N);
23896 std::array<SDValue, 2> ShufOps;
23897 for (auto I : zip(SVN->ops(), ShufOps)) {
23898 SDValue ShufOp = std::get<0>(I);
23899 SDValue &NewShufOp = std::get<1>(I);
23900 if (ShufOp.isUndef())
23901 NewShufOp = DAG.getUNDEF(VT);
23902 else {
23903 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23904 DAG.getUNDEF(OpVT));
23905 ShufOpParts[0] = ShufOp;
23906 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23907 }
23908 }
23909 // Finally, create the new wide shuffle.
23910 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23911}
23912
23913SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23914 // If we only have one input vector, we don't need to do any concatenation.
23915 if (N->getNumOperands() == 1)
23916 return N->getOperand(0);
23917
23918 // Check if all of the operands are undefs.
23919 EVT VT = N->getValueType(0);
23921 return DAG.getUNDEF(VT);
23922
23923 // Optimize concat_vectors where all but the first of the vectors are undef.
23924 if (all_of(drop_begin(N->ops()),
23925 [](const SDValue &Op) { return Op.isUndef(); })) {
23926 SDValue In = N->getOperand(0);
23927 assert(In.getValueType().isVector() && "Must concat vectors");
23928
23929 // If the input is a concat_vectors, just make a larger concat by padding
23930 // with smaller undefs.
23931 //
23932 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23933 // here could cause an infinite loop. That legalizing happens when LegalDAG
23934 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23935 // scalable.
23936 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23937 !(LegalDAG && In.getValueType().isScalableVector())) {
23938 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23939 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23940 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23941 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23942 }
23943
23945
23946 // concat_vectors(scalar_to_vector(scalar), undef) ->
23947 // scalar_to_vector(scalar)
23948 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23949 Scalar.hasOneUse()) {
23950 EVT SVT = Scalar.getValueType().getVectorElementType();
23951 if (SVT == Scalar.getOperand(0).getValueType())
23952 Scalar = Scalar.getOperand(0);
23953 }
23954
23955 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23956 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
23957 // If the bitcast type isn't legal, it might be a trunc of a legal type;
23958 // look through the trunc so we can still do the transform:
23959 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23960 if (Scalar->getOpcode() == ISD::TRUNCATE &&
23961 !TLI.isTypeLegal(Scalar.getValueType()) &&
23962 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23963 Scalar = Scalar->getOperand(0);
23964
23965 EVT SclTy = Scalar.getValueType();
23966
23967 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23968 return SDValue();
23969
23970 // Bail out if the vector size is not a multiple of the scalar size.
23971 if (VT.getSizeInBits() % SclTy.getSizeInBits())
23972 return SDValue();
23973
23974 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23975 if (VNTNumElms < 2)
23976 return SDValue();
23977
23978 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23979 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23980 return SDValue();
23981
23982 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23983 return DAG.getBitcast(VT, Res);
23984 }
23985 }
23986
23987 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23988 // We have already tested above for an UNDEF only concatenation.
23989 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23990 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23991 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23992 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23993 };
23994 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23996 EVT SVT = VT.getScalarType();
23997
23998 EVT MinVT = SVT;
23999 if (!SVT.isFloatingPoint()) {
24000 // If BUILD_VECTOR are from built from integer, they may have different
24001 // operand types. Get the smallest type and truncate all operands to it.
24002 bool FoundMinVT = false;
24003 for (const SDValue &Op : N->ops())
24004 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24005 EVT OpSVT = Op.getOperand(0).getValueType();
24006 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24007 FoundMinVT = true;
24008 }
24009 assert(FoundMinVT && "Concat vector type mismatch");
24010 }
24011
24012 for (const SDValue &Op : N->ops()) {
24013 EVT OpVT = Op.getValueType();
24014 unsigned NumElts = OpVT.getVectorNumElements();
24015
24016 if (ISD::UNDEF == Op.getOpcode())
24017 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24018
24019 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24020 if (SVT.isFloatingPoint()) {
24021 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24022 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24023 } else {
24024 for (unsigned i = 0; i != NumElts; ++i)
24025 Opnds.push_back(
24026 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24027 }
24028 }
24029 }
24030
24031 assert(VT.getVectorNumElements() == Opnds.size() &&
24032 "Concat vector type mismatch");
24033 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24034 }
24035
24036 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24037 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24039 return V;
24040
24041 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24042 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24044 return V;
24045
24046 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24048 return V;
24049 }
24050
24051 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24052 return V;
24053
24055 N, DAG, TLI, LegalTypes, LegalOperations))
24056 return V;
24057
24058 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24059 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24060 // operands and look for a CONCAT operations that place the incoming vectors
24061 // at the exact same location.
24062 //
24063 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24064 SDValue SingleSource = SDValue();
24065 unsigned PartNumElem =
24066 N->getOperand(0).getValueType().getVectorMinNumElements();
24067
24068 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24069 SDValue Op = N->getOperand(i);
24070
24071 if (Op.isUndef())
24072 continue;
24073
24074 // Check if this is the identity extract:
24075 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24076 return SDValue();
24077
24078 // Find the single incoming vector for the extract_subvector.
24079 if (SingleSource.getNode()) {
24080 if (Op.getOperand(0) != SingleSource)
24081 return SDValue();
24082 } else {
24083 SingleSource = Op.getOperand(0);
24084
24085 // Check the source type is the same as the type of the result.
24086 // If not, this concat may extend the vector, so we can not
24087 // optimize it away.
24088 if (SingleSource.getValueType() != N->getValueType(0))
24089 return SDValue();
24090 }
24091
24092 // Check that we are reading from the identity index.
24093 unsigned IdentityIndex = i * PartNumElem;
24094 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24095 return SDValue();
24096 }
24097
24098 if (SingleSource.getNode())
24099 return SingleSource;
24100
24101 return SDValue();
24102}
24103
24104// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24105// if the subvector can be sourced for free.
24107 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24108 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24109 return V.getOperand(1);
24110 }
24111 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24112 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24113 V.getOperand(0).getValueType() == SubVT &&
24114 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24115 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24116 return V.getOperand(SubIdx);
24117 }
24118 return SDValue();
24119}
24120
24122 SelectionDAG &DAG,
24123 bool LegalOperations) {
24124 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24125 SDValue BinOp = Extract->getOperand(0);
24126 unsigned BinOpcode = BinOp.getOpcode();
24127 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24128 return SDValue();
24129
24130 EVT VecVT = BinOp.getValueType();
24131 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24132 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24133 return SDValue();
24134
24135 SDValue Index = Extract->getOperand(1);
24136 EVT SubVT = Extract->getValueType(0);
24137 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24138 return SDValue();
24139
24140 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24141 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24142
24143 // TODO: We could handle the case where only 1 operand is being inserted by
24144 // creating an extract of the other operand, but that requires checking
24145 // number of uses and/or costs.
24146 if (!Sub0 || !Sub1)
24147 return SDValue();
24148
24149 // We are inserting both operands of the wide binop only to extract back
24150 // to the narrow vector size. Eliminate all of the insert/extract:
24151 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24152 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24153 BinOp->getFlags());
24154}
24155
24156/// If we are extracting a subvector produced by a wide binary operator try
24157/// to use a narrow binary operator and/or avoid concatenation and extraction.
24159 bool LegalOperations) {
24160 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24161 // some of these bailouts with other transforms.
24162
24163 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24164 return V;
24165
24166 // The extract index must be a constant, so we can map it to a concat operand.
24167 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24168 if (!ExtractIndexC)
24169 return SDValue();
24170
24171 // We are looking for an optionally bitcasted wide vector binary operator
24172 // feeding an extract subvector.
24173 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24174 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24175 unsigned BOpcode = BinOp.getOpcode();
24176 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24177 return SDValue();
24178
24179 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24180 // reduced to the unary fneg when it is visited, and we probably want to deal
24181 // with fneg in a target-specific way.
24182 if (BOpcode == ISD::FSUB) {
24183 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24184 if (C && C->getValueAPF().isNegZero())
24185 return SDValue();
24186 }
24187
24188 // The binop must be a vector type, so we can extract some fraction of it.
24189 EVT WideBVT = BinOp.getValueType();
24190 // The optimisations below currently assume we are dealing with fixed length
24191 // vectors. It is possible to add support for scalable vectors, but at the
24192 // moment we've done no analysis to prove whether they are profitable or not.
24193 if (!WideBVT.isFixedLengthVector())
24194 return SDValue();
24195
24196 EVT VT = Extract->getValueType(0);
24197 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24198 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24199 "Extract index is not a multiple of the vector length.");
24200
24201 // Bail out if this is not a proper multiple width extraction.
24202 unsigned WideWidth = WideBVT.getSizeInBits();
24203 unsigned NarrowWidth = VT.getSizeInBits();
24204 if (WideWidth % NarrowWidth != 0)
24205 return SDValue();
24206
24207 // Bail out if we are extracting a fraction of a single operation. This can
24208 // occur because we potentially looked through a bitcast of the binop.
24209 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24210 unsigned WideNumElts = WideBVT.getVectorNumElements();
24211 if (WideNumElts % NarrowingRatio != 0)
24212 return SDValue();
24213
24214 // Bail out if the target does not support a narrower version of the binop.
24215 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24216 WideNumElts / NarrowingRatio);
24217 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24218 LegalOperations))
24219 return SDValue();
24220
24221 // If extraction is cheap, we don't need to look at the binop operands
24222 // for concat ops. The narrow binop alone makes this transform profitable.
24223 // We can't just reuse the original extract index operand because we may have
24224 // bitcasted.
24225 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24226 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24227 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24228 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24229 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24230 SDLoc DL(Extract);
24231 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24232 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24233 BinOp.getOperand(0), NewExtIndex);
24234 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24235 BinOp.getOperand(1), NewExtIndex);
24236 SDValue NarrowBinOp =
24237 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24238 return DAG.getBitcast(VT, NarrowBinOp);
24239 }
24240
24241 // Only handle the case where we are doubling and then halving. A larger ratio
24242 // may require more than two narrow binops to replace the wide binop.
24243 if (NarrowingRatio != 2)
24244 return SDValue();
24245
24246 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24247 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24248 // flavors, but no other 256-bit integer support. This could be extended to
24249 // handle any binop, but that may require fixing/adding other folds to avoid
24250 // codegen regressions.
24251 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24252 return SDValue();
24253
24254 // We need at least one concatenation operation of a binop operand to make
24255 // this transform worthwhile. The concat must double the input vector sizes.
24256 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24257 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24258 return V.getOperand(ConcatOpNum);
24259 return SDValue();
24260 };
24261 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24262 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24263
24264 if (SubVecL || SubVecR) {
24265 // If a binop operand was not the result of a concat, we must extract a
24266 // half-sized operand for our new narrow binop:
24267 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24268 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24269 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24270 SDLoc DL(Extract);
24271 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24272 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24273 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24274 BinOp.getOperand(0), IndexC);
24275
24276 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24277 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24278 BinOp.getOperand(1), IndexC);
24279
24280 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24281 return DAG.getBitcast(VT, NarrowBinOp);
24282 }
24283
24284 return SDValue();
24285}
24286
24287/// If we are extracting a subvector from a wide vector load, convert to a
24288/// narrow load to eliminate the extraction:
24289/// (extract_subvector (load wide vector)) --> (load narrow vector)
24291 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24292 if (DAG.getDataLayout().isBigEndian())
24293 return SDValue();
24294
24295 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24296 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24297 return SDValue();
24298
24299 // Allow targets to opt-out.
24300 EVT VT = Extract->getValueType(0);
24301
24302 // We can only create byte sized loads.
24303 if (!VT.isByteSized())
24304 return SDValue();
24305
24306 unsigned Index = Extract->getConstantOperandVal(1);
24307 unsigned NumElts = VT.getVectorMinNumElements();
24308 // A fixed length vector being extracted from a scalable vector
24309 // may not be any *smaller* than the scalable one.
24310 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24311 return SDValue();
24312
24313 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24314 // multiple of the minimum number of elements in the result type.
24315 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24316 "multiple of the result's element count");
24317
24318 // It's fine to use TypeSize here as we know the offset will not be negative.
24319 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24320
24321 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24322 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24323 return SDValue();
24324
24325 // The narrow load will be offset from the base address of the old load if
24326 // we are extracting from something besides index 0 (little-endian).
24327 SDLoc DL(Extract);
24328
24329 // TODO: Use "BaseIndexOffset" to make this more effective.
24330 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24331
24334 MachineMemOperand *MMO;
24335 if (Offset.isScalable()) {
24336 MachinePointerInfo MPI =
24338 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24339 } else
24340 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24341 StoreSize);
24342
24343 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24344 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24345 return NewLd;
24346}
24347
24348/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24349/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24350/// EXTRACT_SUBVECTOR(Op?, ?),
24351/// Mask'))
24352/// iff it is legal and profitable to do so. Notably, the trimmed mask
24353/// (containing only the elements that are extracted)
24354/// must reference at most two subvectors.
24356 SelectionDAG &DAG,
24357 const TargetLowering &TLI,
24358 bool LegalOperations) {
24359 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24360 "Must only be called on EXTRACT_SUBVECTOR's");
24361
24362 SDValue N0 = N->getOperand(0);
24363
24364 // Only deal with non-scalable vectors.
24365 EVT NarrowVT = N->getValueType(0);
24366 EVT WideVT = N0.getValueType();
24367 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24368 return SDValue();
24369
24370 // The operand must be a shufflevector.
24371 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24372 if (!WideShuffleVector)
24373 return SDValue();
24374
24375 // The old shuffleneeds to go away.
24376 if (!WideShuffleVector->hasOneUse())
24377 return SDValue();
24378
24379 // And the narrow shufflevector that we'll form must be legal.
24380 if (LegalOperations &&
24382 return SDValue();
24383
24384 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24385 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24386 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24387 "Extract index is not a multiple of the output vector length.");
24388
24389 int WideNumElts = WideVT.getVectorNumElements();
24390
24391 SmallVector<int, 16> NewMask;
24392 NewMask.reserve(NumEltsExtracted);
24393 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24394 DemandedSubvectors;
24395
24396 // Try to decode the wide mask into narrow mask from at most two subvectors.
24397 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24398 NumEltsExtracted)) {
24399 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24400 "Out-of-bounds shuffle mask?");
24401
24402 if (M < 0) {
24403 // Does not depend on operands, does not require adjustment.
24404 NewMask.emplace_back(M);
24405 continue;
24406 }
24407
24408 // From which operand of the shuffle does this shuffle mask element pick?
24409 int WideShufOpIdx = M / WideNumElts;
24410 // Which element of that operand is picked?
24411 int OpEltIdx = M % WideNumElts;
24412
24413 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24414 "Shuffle mask vector decomposition failure.");
24415
24416 // And which NumEltsExtracted-sized subvector of that operand is that?
24417 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24418 // And which element within that subvector of that operand is that?
24419 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24420
24421 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24422 "Shuffle mask subvector decomposition failure.");
24423
24424 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24425 WideShufOpIdx * WideNumElts) == M &&
24426 "Shuffle mask full decomposition failure.");
24427
24428 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24429
24430 if (Op.isUndef()) {
24431 // Picking from an undef operand. Let's adjust mask instead.
24432 NewMask.emplace_back(-1);
24433 continue;
24434 }
24435
24436 const std::pair<SDValue, int> DemandedSubvector =
24437 std::make_pair(Op, OpSubvecIdx);
24438
24439 if (DemandedSubvectors.insert(DemandedSubvector)) {
24440 if (DemandedSubvectors.size() > 2)
24441 return SDValue(); // We can't handle more than two subvectors.
24442 // How many elements into the WideVT does this subvector start?
24443 int Index = NumEltsExtracted * OpSubvecIdx;
24444 // Bail out if the extraction isn't going to be cheap.
24445 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24446 return SDValue();
24447 }
24448
24449 // Ok, but from which operand of the new shuffle will this element pick?
24450 int NewOpIdx =
24451 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24452 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24453
24454 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24455 NewMask.emplace_back(AdjM);
24456 }
24457 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24458 assert(DemandedSubvectors.size() <= 2 &&
24459 "Should have ended up demanding at most two subvectors.");
24460
24461 // Did we discover that the shuffle does not actually depend on operands?
24462 if (DemandedSubvectors.empty())
24463 return DAG.getUNDEF(NarrowVT);
24464
24465 // Profitability check: only deal with extractions from the first subvector
24466 // unless the mask becomes an identity mask.
24467 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24468 any_of(NewMask, [](int M) { return M < 0; }))
24469 for (auto &DemandedSubvector : DemandedSubvectors)
24470 if (DemandedSubvector.second != 0)
24471 return SDValue();
24472
24473 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24474 // operand[s]/index[es], so there is no point in checking for it's legality.
24475
24476 // Do not turn a legal shuffle into an illegal one.
24477 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24478 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24479 return SDValue();
24480
24481 SDLoc DL(N);
24482
24484 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24485 &DemandedSubvector : DemandedSubvectors) {
24486 // How many elements into the WideVT does this subvector start?
24487 int Index = NumEltsExtracted * DemandedSubvector.second;
24488 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24489 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24490 DemandedSubvector.first, IndexC));
24491 }
24492 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24493 "Should end up with either one or two ops");
24494
24495 // If we ended up with only one operand, pad with an undef.
24496 if (NewOps.size() == 1)
24497 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24498
24499 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24500}
24501
24502SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24503 EVT NVT = N->getValueType(0);
24504 SDValue V = N->getOperand(0);
24505 uint64_t ExtIdx = N->getConstantOperandVal(1);
24506 SDLoc DL(N);
24507
24508 // Extract from UNDEF is UNDEF.
24509 if (V.isUndef())
24510 return DAG.getUNDEF(NVT);
24511
24513 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24514 return NarrowLoad;
24515
24516 // Combine an extract of an extract into a single extract_subvector.
24517 // ext (ext X, C), 0 --> ext X, C
24518 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24519 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24520 V.getConstantOperandVal(1)) &&
24522 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24523 V.getOperand(1));
24524 }
24525 }
24526
24527 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24528 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24529 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24530 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24531 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24532
24533 // extract_subvector(insert_subvector(x,y,c1),c2)
24534 // --> extract_subvector(y,c2-c1)
24535 // iff we're just extracting from the inserted subvector.
24536 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24537 SDValue InsSub = V.getOperand(1);
24538 EVT InsSubVT = InsSub.getValueType();
24539 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24540 unsigned InsIdx = V.getConstantOperandVal(2);
24541 unsigned NumSubElts = NVT.getVectorMinNumElements();
24542 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24543 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24544 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24545 V.getValueType().isFixedLengthVector())
24546 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24547 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24548 }
24549
24550 // Try to move vector bitcast after extract_subv by scaling extraction index:
24551 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24552 if (V.getOpcode() == ISD::BITCAST &&
24553 V.getOperand(0).getValueType().isVector() &&
24554 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24555 SDValue SrcOp = V.getOperand(0);
24556 EVT SrcVT = SrcOp.getValueType();
24557 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24558 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24559 if ((SrcNumElts % DestNumElts) == 0) {
24560 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24561 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24562 EVT NewExtVT =
24563 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24565 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24566 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24567 V.getOperand(0), NewIndex);
24568 return DAG.getBitcast(NVT, NewExtract);
24569 }
24570 }
24571 if ((DestNumElts % SrcNumElts) == 0) {
24572 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24573 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24574 ElementCount NewExtEC =
24575 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24576 EVT ScalarVT = SrcVT.getScalarType();
24577 if ((ExtIdx % DestSrcRatio) == 0) {
24578 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24579 EVT NewExtVT =
24580 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24582 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24583 SDValue NewExtract =
24584 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24585 V.getOperand(0), NewIndex);
24586 return DAG.getBitcast(NVT, NewExtract);
24587 }
24588 if (NewExtEC.isScalar() &&
24590 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24591 SDValue NewExtract =
24592 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24593 V.getOperand(0), NewIndex);
24594 return DAG.getBitcast(NVT, NewExtract);
24595 }
24596 }
24597 }
24598 }
24599 }
24600
24601 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24602 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24603 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24604 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24605 "Concat and extract subvector do not change element type");
24606 assert((ExtIdx % ExtNumElts) == 0 &&
24607 "Extract index is not a multiple of the input vector length.");
24608
24609 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24610 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24611
24612 // If the concatenated source types match this extract, it's a direct
24613 // simplification:
24614 // extract_subvec (concat V1, V2, ...), i --> Vi
24615 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24616 return V.getOperand(ConcatOpIdx);
24617
24618 // If the concatenated source vectors are a multiple length of this extract,
24619 // then extract a fraction of one of those source vectors directly from a
24620 // concat operand. Example:
24621 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24622 // v2i8 extract_subvec v8i8 Y, 6
24623 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24624 ConcatSrcNumElts % ExtNumElts == 0) {
24625 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24626 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24627 "Trying to extract from >1 concat operand?");
24628 assert(NewExtIdx % ExtNumElts == 0 &&
24629 "Extract index is not a multiple of the input vector length.");
24630 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24631 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24632 V.getOperand(ConcatOpIdx), NewIndexC);
24633 }
24634 }
24635
24636 if (SDValue V =
24637 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24638 return V;
24639
24641
24642 // If the input is a build vector. Try to make a smaller build vector.
24643 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24644 EVT InVT = V.getValueType();
24645 unsigned ExtractSize = NVT.getSizeInBits();
24646 unsigned EltSize = InVT.getScalarSizeInBits();
24647 // Only do this if we won't split any elements.
24648 if (ExtractSize % EltSize == 0) {
24649 unsigned NumElems = ExtractSize / EltSize;
24650 EVT EltVT = InVT.getVectorElementType();
24651 EVT ExtractVT =
24652 NumElems == 1 ? EltVT
24653 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24654 if ((Level < AfterLegalizeDAG ||
24655 (NumElems == 1 ||
24656 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24657 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24658 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24659
24660 if (NumElems == 1) {
24661 SDValue Src = V->getOperand(IdxVal);
24662 if (EltVT != Src.getValueType())
24663 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24664 return DAG.getBitcast(NVT, Src);
24665 }
24666
24667 // Extract the pieces from the original build_vector.
24668 SDValue BuildVec =
24669 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24670 return DAG.getBitcast(NVT, BuildVec);
24671 }
24672 }
24673 }
24674
24675 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24676 // Handle only simple case where vector being inserted and vector
24677 // being extracted are of same size.
24678 EVT SmallVT = V.getOperand(1).getValueType();
24679 if (!NVT.bitsEq(SmallVT))
24680 return SDValue();
24681
24682 // Combine:
24683 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24684 // Into:
24685 // indices are equal or bit offsets are equal => V1
24686 // otherwise => (extract_subvec V1, ExtIdx)
24687 uint64_t InsIdx = V.getConstantOperandVal(2);
24688 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24689 ExtIdx * NVT.getScalarSizeInBits()) {
24690 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24691 return SDValue();
24692
24693 return DAG.getBitcast(NVT, V.getOperand(1));
24694 }
24695 return DAG.getNode(
24697 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24698 N->getOperand(1));
24699 }
24700
24701 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24702 return NarrowBOp;
24703
24705 return SDValue(N, 0);
24706
24707 return SDValue();
24708}
24709
24710/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24711/// followed by concatenation. Narrow vector ops may have better performance
24712/// than wide ops, and this can unlock further narrowing of other vector ops.
24713/// Targets can invert this transform later if it is not profitable.
24715 SelectionDAG &DAG) {
24716 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24717 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24718 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24719 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24720 return SDValue();
24721
24722 // Split the wide shuffle mask into halves. Any mask element that is accessing
24723 // operand 1 is offset down to account for narrowing of the vectors.
24724 ArrayRef<int> Mask = Shuf->getMask();
24725 EVT VT = Shuf->getValueType(0);
24726 unsigned NumElts = VT.getVectorNumElements();
24727 unsigned HalfNumElts = NumElts / 2;
24728 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24729 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24730 for (unsigned i = 0; i != NumElts; ++i) {
24731 if (Mask[i] == -1)
24732 continue;
24733 // If we reference the upper (undef) subvector then the element is undef.
24734 if ((Mask[i] % NumElts) >= HalfNumElts)
24735 continue;
24736 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24737 if (i < HalfNumElts)
24738 Mask0[i] = M;
24739 else
24740 Mask1[i - HalfNumElts] = M;
24741 }
24742
24743 // Ask the target if this is a valid transform.
24744 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24745 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24746 HalfNumElts);
24747 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24748 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24749 return SDValue();
24750
24751 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24752 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24753 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24754 SDLoc DL(Shuf);
24755 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24756 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24757 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24758}
24759
24760// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24761// or turn a shuffle of a single concat into simpler shuffle then concat.
24763 EVT VT = N->getValueType(0);
24764 unsigned NumElts = VT.getVectorNumElements();
24765
24766 SDValue N0 = N->getOperand(0);
24767 SDValue N1 = N->getOperand(1);
24768 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24769 ArrayRef<int> Mask = SVN->getMask();
24770
24772 EVT ConcatVT = N0.getOperand(0).getValueType();
24773 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24774 unsigned NumConcats = NumElts / NumElemsPerConcat;
24775
24776 auto IsUndefMaskElt = [](int i) { return i == -1; };
24777
24778 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24779 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24780 // half vector elements.
24781 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24782 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24783 IsUndefMaskElt)) {
24784 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24785 N0.getOperand(1),
24786 Mask.slice(0, NumElemsPerConcat));
24787 N1 = DAG.getUNDEF(ConcatVT);
24788 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24789 }
24790
24791 // Look at every vector that's inserted. We're looking for exact
24792 // subvector-sized copies from a concatenated vector
24793 for (unsigned I = 0; I != NumConcats; ++I) {
24794 unsigned Begin = I * NumElemsPerConcat;
24795 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24796
24797 // Make sure we're dealing with a copy.
24798 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24799 Ops.push_back(DAG.getUNDEF(ConcatVT));
24800 continue;
24801 }
24802
24803 int OpIdx = -1;
24804 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24805 if (IsUndefMaskElt(SubMask[i]))
24806 continue;
24807 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24808 return SDValue();
24809 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24810 if (0 <= OpIdx && EltOpIdx != OpIdx)
24811 return SDValue();
24812 OpIdx = EltOpIdx;
24813 }
24814 assert(0 <= OpIdx && "Unknown concat_vectors op");
24815
24816 if (OpIdx < (int)N0.getNumOperands())
24817 Ops.push_back(N0.getOperand(OpIdx));
24818 else
24819 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24820 }
24821
24822 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24823}
24824
24825// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24826// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24827//
24828// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24829// a simplification in some sense, but it isn't appropriate in general: some
24830// BUILD_VECTORs are substantially cheaper than others. The general case
24831// of a BUILD_VECTOR requires inserting each element individually (or
24832// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24833// all constants is a single constant pool load. A BUILD_VECTOR where each
24834// element is identical is a splat. A BUILD_VECTOR where most of the operands
24835// are undef lowers to a small number of element insertions.
24836//
24837// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24838// We don't fold shuffles where one side is a non-zero constant, and we don't
24839// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24840// non-constant operands. This seems to work out reasonably well in practice.
24842 SelectionDAG &DAG,
24843 const TargetLowering &TLI) {
24844 EVT VT = SVN->getValueType(0);
24845 unsigned NumElts = VT.getVectorNumElements();
24846 SDValue N0 = SVN->getOperand(0);
24847 SDValue N1 = SVN->getOperand(1);
24848
24849 if (!N0->hasOneUse())
24850 return SDValue();
24851
24852 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24853 // discussed above.
24854 if (!N1.isUndef()) {
24855 if (!N1->hasOneUse())
24856 return SDValue();
24857
24858 bool N0AnyConst = isAnyConstantBuildVector(N0);
24859 bool N1AnyConst = isAnyConstantBuildVector(N1);
24860 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24861 return SDValue();
24862 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24863 return SDValue();
24864 }
24865
24866 // If both inputs are splats of the same value then we can safely merge this
24867 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24868 bool IsSplat = false;
24869 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24870 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24871 if (BV0 && BV1)
24872 if (SDValue Splat0 = BV0->getSplatValue())
24873 IsSplat = (Splat0 == BV1->getSplatValue());
24874
24876 SmallSet<SDValue, 16> DuplicateOps;
24877 for (int M : SVN->getMask()) {
24878 SDValue Op = DAG.getUNDEF(VT.getScalarType());
24879 if (M >= 0) {
24880 int Idx = M < (int)NumElts ? M : M - NumElts;
24881 SDValue &S = (M < (int)NumElts ? N0 : N1);
24882 if (S.getOpcode() == ISD::BUILD_VECTOR) {
24883 Op = S.getOperand(Idx);
24884 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24885 SDValue Op0 = S.getOperand(0);
24886 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24887 } else {
24888 // Operand can't be combined - bail out.
24889 return SDValue();
24890 }
24891 }
24892
24893 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24894 // generating a splat; semantically, this is fine, but it's likely to
24895 // generate low-quality code if the target can't reconstruct an appropriate
24896 // shuffle.
24897 if (!Op.isUndef() && !isIntOrFPConstant(Op))
24898 if (!IsSplat && !DuplicateOps.insert(Op).second)
24899 return SDValue();
24900
24901 Ops.push_back(Op);
24902 }
24903
24904 // BUILD_VECTOR requires all inputs to be of the same type, find the
24905 // maximum type and extend them all.
24906 EVT SVT = VT.getScalarType();
24907 if (SVT.isInteger())
24908 for (SDValue &Op : Ops)
24909 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24910 if (SVT != VT.getScalarType())
24911 for (SDValue &Op : Ops)
24912 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24913 : (TLI.isZExtFree(Op.getValueType(), SVT)
24914 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24915 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24916 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24917}
24918
24919// Match shuffles that can be converted to *_vector_extend_in_reg.
24920// This is often generated during legalization.
24921// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24922// and returns the EVT to which the extension should be performed.
24923// NOTE: this assumes that the src is the first operand of the shuffle.
24925 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24926 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24927 bool LegalOperations) {
24928 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24929
24930 // TODO Add support for big-endian when we have a test case.
24931 if (!VT.isInteger() || IsBigEndian)
24932 return std::nullopt;
24933
24934 unsigned NumElts = VT.getVectorNumElements();
24935 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24936
24937 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24938 // power-of-2 extensions as they are the most likely.
24939 // FIXME: should try Scale == NumElts case too,
24940 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24941 // The vector width must be a multiple of Scale.
24942 if (NumElts % Scale != 0)
24943 continue;
24944
24945 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24946 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24947
24948 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24949 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24950 continue;
24951
24952 if (Match(Scale))
24953 return OutVT;
24954 }
24955
24956 return std::nullopt;
24957}
24958
24959// Match shuffles that can be converted to any_vector_extend_in_reg.
24960// This is often generated during legalization.
24961// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24963 SelectionDAG &DAG,
24964 const TargetLowering &TLI,
24965 bool LegalOperations) {
24966 EVT VT = SVN->getValueType(0);
24967 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24968
24969 // TODO Add support for big-endian when we have a test case.
24970 if (!VT.isInteger() || IsBigEndian)
24971 return SDValue();
24972
24973 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24974 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24975 Mask = SVN->getMask()](unsigned Scale) {
24976 for (unsigned i = 0; i != NumElts; ++i) {
24977 if (Mask[i] < 0)
24978 continue;
24979 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24980 continue;
24981 return false;
24982 }
24983 return true;
24984 };
24985
24986 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24987 SDValue N0 = SVN->getOperand(0);
24988 // Never create an illegal type. Only create unsupported operations if we
24989 // are pre-legalization.
24990 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24991 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24992 if (!OutVT)
24993 return SDValue();
24994 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24995}
24996
24997// Match shuffles that can be converted to zero_extend_vector_inreg.
24998// This is often generated during legalization.
24999// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25001 SelectionDAG &DAG,
25002 const TargetLowering &TLI,
25003 bool LegalOperations) {
25004 bool LegalTypes = true;
25005 EVT VT = SVN->getValueType(0);
25006 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25007 unsigned NumElts = VT.getVectorNumElements();
25008 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25009
25010 // TODO: add support for big-endian when we have a test case.
25011 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25012 if (!VT.isInteger() || IsBigEndian)
25013 return SDValue();
25014
25015 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25016 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25017 for (int &Indice : Mask) {
25018 if (Indice < 0)
25019 continue;
25020 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25021 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25022 Fn(Indice, OpIdx, OpEltIdx);
25023 }
25024 };
25025
25026 // Which elements of which operand does this shuffle demand?
25027 std::array<APInt, 2> OpsDemandedElts;
25028 for (APInt &OpDemandedElts : OpsDemandedElts)
25029 OpDemandedElts = APInt::getZero(NumElts);
25030 ForEachDecomposedIndice(
25031 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25032 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25033 });
25034
25035 // Element-wise(!), which of these demanded elements are know to be zero?
25036 std::array<APInt, 2> OpsKnownZeroElts;
25037 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25038 std::get<2>(I) =
25039 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25040
25041 // Manifest zeroable element knowledge in the shuffle mask.
25042 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25043 // this is a local invention, but it won't leak into DAG.
25044 // FIXME: should we not manifest them, but just check when matching?
25045 bool HadZeroableElts = false;
25046 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25047 int &Indice, int OpIdx, int OpEltIdx) {
25048 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25049 Indice = -2; // Zeroable element.
25050 HadZeroableElts = true;
25051 }
25052 });
25053
25054 // Don't proceed unless we've refined at least one zeroable mask indice.
25055 // If we didn't, then we are still trying to match the same shuffle mask
25056 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25057 // and evidently failed. Proceeding will lead to endless combine loops.
25058 if (!HadZeroableElts)
25059 return SDValue();
25060
25061 // The shuffle may be more fine-grained than we want. Widen elements first.
25062 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25063 SmallVector<int, 16> ScaledMask;
25064 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25065 assert(Mask.size() >= ScaledMask.size() &&
25066 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25067 int Prescale = Mask.size() / ScaledMask.size();
25068
25069 NumElts = ScaledMask.size();
25070 EltSizeInBits *= Prescale;
25071
25072 EVT PrescaledVT = EVT::getVectorVT(
25073 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25074 NumElts);
25075
25076 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25077 return SDValue();
25078
25079 // For example,
25080 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25081 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25082 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25083 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25084 "Unexpected mask scaling factor.");
25085 ArrayRef<int> Mask = ScaledMask;
25086 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25087 SrcElt != NumSrcElts; ++SrcElt) {
25088 // Analyze the shuffle mask in Scale-sized chunks.
25089 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25090 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25091 Mask = Mask.drop_front(MaskChunk.size());
25092 // The first indice in this chunk must be SrcElt, but not zero!
25093 // FIXME: undef should be fine, but that results in more-defined result.
25094 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25095 return false;
25096 // The rest of the indices in this chunk must be zeros.
25097 // FIXME: undef should be fine, but that results in more-defined result.
25098 if (!all_of(MaskChunk.drop_front(1),
25099 [](int Indice) { return Indice == -2; }))
25100 return false;
25101 }
25102 assert(Mask.empty() && "Did not process the whole mask?");
25103 return true;
25104 };
25105
25106 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25107 for (bool Commuted : {false, true}) {
25108 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25109 if (Commuted)
25111 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25112 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25113 LegalOperations);
25114 if (OutVT)
25115 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25116 DAG.getBitcast(PrescaledVT, Op)));
25117 }
25118 return SDValue();
25119}
25120
25121// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25122// each source element of a large type into the lowest elements of a smaller
25123// destination type. This is often generated during legalization.
25124// If the source node itself was a '*_extend_vector_inreg' node then we should
25125// then be able to remove it.
25127 SelectionDAG &DAG) {
25128 EVT VT = SVN->getValueType(0);
25129 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25130
25131 // TODO Add support for big-endian when we have a test case.
25132 if (!VT.isInteger() || IsBigEndian)
25133 return SDValue();
25134
25136
25137 unsigned Opcode = N0.getOpcode();
25138 if (!ISD::isExtVecInRegOpcode(Opcode))
25139 return SDValue();
25140
25141 SDValue N00 = N0.getOperand(0);
25142 ArrayRef<int> Mask = SVN->getMask();
25143 unsigned NumElts = VT.getVectorNumElements();
25144 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25145 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25146 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25147
25148 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25149 return SDValue();
25150 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25151
25152 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25153 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25154 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25155 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25156 for (unsigned i = 0; i != NumElts; ++i) {
25157 if (Mask[i] < 0)
25158 continue;
25159 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25160 continue;
25161 return false;
25162 }
25163 return true;
25164 };
25165
25166 // At the moment we just handle the case where we've truncated back to the
25167 // same size as before the extension.
25168 // TODO: handle more extension/truncation cases as cases arise.
25169 if (EltSizeInBits != ExtSrcSizeInBits)
25170 return SDValue();
25171
25172 // We can remove *extend_vector_inreg only if the truncation happens at
25173 // the same scale as the extension.
25174 if (isTruncate(ExtScale))
25175 return DAG.getBitcast(VT, N00);
25176
25177 return SDValue();
25178}
25179
25180// Combine shuffles of splat-shuffles of the form:
25181// shuffle (shuffle V, undef, splat-mask), undef, M
25182// If splat-mask contains undef elements, we need to be careful about
25183// introducing undef's in the folded mask which are not the result of composing
25184// the masks of the shuffles.
25186 SelectionDAG &DAG) {
25187 EVT VT = Shuf->getValueType(0);
25188 unsigned NumElts = VT.getVectorNumElements();
25189
25190 if (!Shuf->getOperand(1).isUndef())
25191 return SDValue();
25192
25193 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25194 // in disguise, with all demanded elements being identical.
25195 // FIXME: this can be done per-operand.
25196 if (!Shuf->isSplat()) {
25197 APInt DemandedElts(NumElts, 0);
25198 for (int Idx : Shuf->getMask()) {
25199 if (Idx < 0)
25200 continue; // Ignore sentinel indices.
25201 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25202 DemandedElts.setBit(Idx);
25203 }
25204 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25205 APInt UndefElts;
25206 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25207 // Even if all demanded elements are splat, some of them could be undef.
25208 // Which lowest demanded element is *not* known-undef?
25209 std::optional<unsigned> MinNonUndefIdx;
25210 for (int Idx : Shuf->getMask()) {
25211 if (Idx < 0 || UndefElts[Idx])
25212 continue; // Ignore sentinel indices, and undef elements.
25213 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25214 }
25215 if (!MinNonUndefIdx)
25216 return DAG.getUNDEF(VT); // All undef - result is undef.
25217 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25218 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25219 Shuf->getMask().end());
25220 for (int &Idx : SplatMask) {
25221 if (Idx < 0)
25222 continue; // Passthrough sentinel indices.
25223 // Otherwise, just pick the lowest demanded non-undef element.
25224 // Or sentinel undef, if we know we'd pick a known-undef element.
25225 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25226 }
25227 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25228 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25229 Shuf->getOperand(1), SplatMask);
25230 }
25231 }
25232
25233 // If the inner operand is a known splat with no undefs, just return that directly.
25234 // TODO: Create DemandedElts mask from Shuf's mask.
25235 // TODO: Allow undef elements and merge with the shuffle code below.
25236 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25237 return Shuf->getOperand(0);
25238
25239 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25240 if (!Splat || !Splat->isSplat())
25241 return SDValue();
25242
25243 ArrayRef<int> ShufMask = Shuf->getMask();
25244 ArrayRef<int> SplatMask = Splat->getMask();
25245 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25246
25247 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25248 // every undef mask element in the splat-shuffle has a corresponding undef
25249 // element in the user-shuffle's mask or if the composition of mask elements
25250 // would result in undef.
25251 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25252 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25253 // In this case it is not legal to simplify to the splat-shuffle because we
25254 // may be exposing the users of the shuffle an undef element at index 1
25255 // which was not there before the combine.
25256 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25257 // In this case the composition of masks yields SplatMask, so it's ok to
25258 // simplify to the splat-shuffle.
25259 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25260 // In this case the composed mask includes all undef elements of SplatMask
25261 // and in addition sets element zero to undef. It is safe to simplify to
25262 // the splat-shuffle.
25263 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25264 ArrayRef<int> SplatMask) {
25265 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25266 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25267 SplatMask[UserMask[i]] != -1)
25268 return false;
25269 return true;
25270 };
25271 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25272 return Shuf->getOperand(0);
25273
25274 // Create a new shuffle with a mask that is composed of the two shuffles'
25275 // masks.
25276 SmallVector<int, 32> NewMask;
25277 for (int Idx : ShufMask)
25278 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25279
25280 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25281 Splat->getOperand(0), Splat->getOperand(1),
25282 NewMask);
25283}
25284
25285// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25286// the mask can be treated as a larger type.
25288 SelectionDAG &DAG,
25289 const TargetLowering &TLI,
25290 bool LegalOperations) {
25291 SDValue Op0 = SVN->getOperand(0);
25292 SDValue Op1 = SVN->getOperand(1);
25293 EVT VT = SVN->getValueType(0);
25294 if (Op0.getOpcode() != ISD::BITCAST)
25295 return SDValue();
25296 EVT InVT = Op0.getOperand(0).getValueType();
25297 if (!InVT.isVector() ||
25298 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25299 Op1.getOperand(0).getValueType() != InVT)))
25300 return SDValue();
25302 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25303 return SDValue();
25304
25305 int VTLanes = VT.getVectorNumElements();
25306 int InLanes = InVT.getVectorNumElements();
25307 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25308 (LegalOperations &&
25310 return SDValue();
25311 int Factor = VTLanes / InLanes;
25312
25313 // Check that each group of lanes in the mask are either undef or make a valid
25314 // mask for the wider lane type.
25315 ArrayRef<int> Mask = SVN->getMask();
25316 SmallVector<int> NewMask;
25317 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25318 return SDValue();
25319
25320 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25321 return SDValue();
25322
25323 // Create the new shuffle with the new mask and bitcast it back to the
25324 // original type.
25325 SDLoc DL(SVN);
25326 Op0 = Op0.getOperand(0);
25327 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25328 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25329 return DAG.getBitcast(VT, NewShuf);
25330}
25331
25332/// Combine shuffle of shuffle of the form:
25333/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25335 SelectionDAG &DAG) {
25336 if (!OuterShuf->getOperand(1).isUndef())
25337 return SDValue();
25338 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25339 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25340 return SDValue();
25341
25342 ArrayRef<int> OuterMask = OuterShuf->getMask();
25343 ArrayRef<int> InnerMask = InnerShuf->getMask();
25344 unsigned NumElts = OuterMask.size();
25345 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25346 SmallVector<int, 32> CombinedMask(NumElts, -1);
25347 int SplatIndex = -1;
25348 for (unsigned i = 0; i != NumElts; ++i) {
25349 // Undef lanes remain undef.
25350 int OuterMaskElt = OuterMask[i];
25351 if (OuterMaskElt == -1)
25352 continue;
25353
25354 // Peek through the shuffle masks to get the underlying source element.
25355 int InnerMaskElt = InnerMask[OuterMaskElt];
25356 if (InnerMaskElt == -1)
25357 continue;
25358
25359 // Initialize the splatted element.
25360 if (SplatIndex == -1)
25361 SplatIndex = InnerMaskElt;
25362
25363 // Non-matching index - this is not a splat.
25364 if (SplatIndex != InnerMaskElt)
25365 return SDValue();
25366
25367 CombinedMask[i] = InnerMaskElt;
25368 }
25369 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25370 getSplatIndex(CombinedMask) != -1) &&
25371 "Expected a splat mask");
25372
25373 // TODO: The transform may be a win even if the mask is not legal.
25374 EVT VT = OuterShuf->getValueType(0);
25375 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25376 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25377 return SDValue();
25378
25379 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25380 InnerShuf->getOperand(1), CombinedMask);
25381}
25382
25383/// If the shuffle mask is taking exactly one element from the first vector
25384/// operand and passing through all other elements from the second vector
25385/// operand, return the index of the mask element that is choosing an element
25386/// from the first operand. Otherwise, return -1.
25388 int MaskSize = Mask.size();
25389 int EltFromOp0 = -1;
25390 // TODO: This does not match if there are undef elements in the shuffle mask.
25391 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25392 // removing an instruction (a shuffle), but losing the knowledge that some
25393 // vector lanes are not needed.
25394 for (int i = 0; i != MaskSize; ++i) {
25395 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25396 // We're looking for a shuffle of exactly one element from operand 0.
25397 if (EltFromOp0 != -1)
25398 return -1;
25399 EltFromOp0 = i;
25400 } else if (Mask[i] != i + MaskSize) {
25401 // Nothing from operand 1 can change lanes.
25402 return -1;
25403 }
25404 }
25405 return EltFromOp0;
25406}
25407
25408/// If a shuffle inserts exactly one element from a source vector operand into
25409/// another vector operand and we can access the specified element as a scalar,
25410/// then we can eliminate the shuffle.
25412 SelectionDAG &DAG) {
25413 // First, check if we are taking one element of a vector and shuffling that
25414 // element into another vector.
25415 ArrayRef<int> Mask = Shuf->getMask();
25416 SmallVector<int, 16> CommutedMask(Mask);
25417 SDValue Op0 = Shuf->getOperand(0);
25418 SDValue Op1 = Shuf->getOperand(1);
25419 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25420 if (ShufOp0Index == -1) {
25421 // Commute mask and check again.
25423 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25424 if (ShufOp0Index == -1)
25425 return SDValue();
25426 // Commute operands to match the commuted shuffle mask.
25427 std::swap(Op0, Op1);
25428 Mask = CommutedMask;
25429 }
25430
25431 // The shuffle inserts exactly one element from operand 0 into operand 1.
25432 // Now see if we can access that element as a scalar via a real insert element
25433 // instruction.
25434 // TODO: We can try harder to locate the element as a scalar. Examples: it
25435 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25436 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25437 "Shuffle mask value must be from operand 0");
25438 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25439 return SDValue();
25440
25441 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25442 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25443 return SDValue();
25444
25445 // There's an existing insertelement with constant insertion index, so we
25446 // don't need to check the legality/profitability of a replacement operation
25447 // that differs at most in the constant value. The target should be able to
25448 // lower any of those in a similar way. If not, legalization will expand this
25449 // to a scalar-to-vector plus shuffle.
25450 //
25451 // Note that the shuffle may move the scalar from the position that the insert
25452 // element used. Therefore, our new insert element occurs at the shuffle's
25453 // mask index value, not the insert's index value.
25454 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25455 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25456 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25457 Op1, Op0.getOperand(1), NewInsIndex);
25458}
25459
25460/// If we have a unary shuffle of a shuffle, see if it can be folded away
25461/// completely. This has the potential to lose undef knowledge because the first
25462/// shuffle may not have an undef mask element where the second one does. So
25463/// only call this after doing simplifications based on demanded elements.
25465 // shuf (shuf0 X, Y, Mask0), undef, Mask
25466 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25467 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25468 return SDValue();
25469
25470 ArrayRef<int> Mask = Shuf->getMask();
25471 ArrayRef<int> Mask0 = Shuf0->getMask();
25472 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25473 // Ignore undef elements.
25474 if (Mask[i] == -1)
25475 continue;
25476 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25477
25478 // Is the element of the shuffle operand chosen by this shuffle the same as
25479 // the element chosen by the shuffle operand itself?
25480 if (Mask0[Mask[i]] != Mask0[i])
25481 return SDValue();
25482 }
25483 // Every element of this shuffle is identical to the result of the previous
25484 // shuffle, so we can replace this value.
25485 return Shuf->getOperand(0);
25486}
25487
25488SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25489 EVT VT = N->getValueType(0);
25490 unsigned NumElts = VT.getVectorNumElements();
25491
25492 SDValue N0 = N->getOperand(0);
25493 SDValue N1 = N->getOperand(1);
25494
25495 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25496
25497 // Canonicalize shuffle undef, undef -> undef
25498 if (N0.isUndef() && N1.isUndef())
25499 return DAG.getUNDEF(VT);
25500
25501 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25502
25503 // Canonicalize shuffle v, v -> v, undef
25504 if (N0 == N1)
25505 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25506 createUnaryMask(SVN->getMask(), NumElts));
25507
25508 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25509 if (N0.isUndef())
25510 return DAG.getCommutedVectorShuffle(*SVN);
25511
25512 // Remove references to rhs if it is undef
25513 if (N1.isUndef()) {
25514 bool Changed = false;
25515 SmallVector<int, 8> NewMask;
25516 for (unsigned i = 0; i != NumElts; ++i) {
25517 int Idx = SVN->getMaskElt(i);
25518 if (Idx >= (int)NumElts) {
25519 Idx = -1;
25520 Changed = true;
25521 }
25522 NewMask.push_back(Idx);
25523 }
25524 if (Changed)
25525 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25526 }
25527
25528 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25529 return InsElt;
25530
25531 // A shuffle of a single vector that is a splatted value can always be folded.
25532 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25533 return V;
25534
25535 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25536 return V;
25537
25538 // If it is a splat, check if the argument vector is another splat or a
25539 // build_vector.
25540 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25541 int SplatIndex = SVN->getSplatIndex();
25542 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25543 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25544 // splat (vector_bo L, R), Index -->
25545 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25546 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25547 SDLoc DL(N);
25548 EVT EltVT = VT.getScalarType();
25549 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25550 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25551 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25552 SDValue NewBO =
25553 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25554 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25556 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25557 }
25558
25559 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25560 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25561 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25562 N0.hasOneUse()) {
25563 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25564 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25565
25567 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25568 if (Idx->getAPIntValue() == SplatIndex)
25569 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25570
25571 // Look through a bitcast if LE and splatting lane 0, through to a
25572 // scalar_to_vector or a build_vector.
25573 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25574 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25577 EVT N00VT = N0.getOperand(0).getValueType();
25578 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25579 VT.isInteger() && N00VT.isInteger()) {
25580 EVT InVT =
25583 SDLoc(N), InVT);
25584 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25585 }
25586 }
25587 }
25588
25589 // If this is a bit convert that changes the element type of the vector but
25590 // not the number of vector elements, look through it. Be careful not to
25591 // look though conversions that change things like v4f32 to v2f64.
25592 SDNode *V = N0.getNode();
25593 if (V->getOpcode() == ISD::BITCAST) {
25594 SDValue ConvInput = V->getOperand(0);
25595 if (ConvInput.getValueType().isVector() &&
25596 ConvInput.getValueType().getVectorNumElements() == NumElts)
25597 V = ConvInput.getNode();
25598 }
25599
25600 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25601 assert(V->getNumOperands() == NumElts &&
25602 "BUILD_VECTOR has wrong number of operands");
25603 SDValue Base;
25604 bool AllSame = true;
25605 for (unsigned i = 0; i != NumElts; ++i) {
25606 if (!V->getOperand(i).isUndef()) {
25607 Base = V->getOperand(i);
25608 break;
25609 }
25610 }
25611 // Splat of <u, u, u, u>, return <u, u, u, u>
25612 if (!Base.getNode())
25613 return N0;
25614 for (unsigned i = 0; i != NumElts; ++i) {
25615 if (V->getOperand(i) != Base) {
25616 AllSame = false;
25617 break;
25618 }
25619 }
25620 // Splat of <x, x, x, x>, return <x, x, x, x>
25621 if (AllSame)
25622 return N0;
25623
25624 // Canonicalize any other splat as a build_vector.
25625 SDValue Splatted = V->getOperand(SplatIndex);
25626 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25627 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25628
25629 // We may have jumped through bitcasts, so the type of the
25630 // BUILD_VECTOR may not match the type of the shuffle.
25631 if (V->getValueType(0) != VT)
25632 NewBV = DAG.getBitcast(VT, NewBV);
25633 return NewBV;
25634 }
25635 }
25636
25637 // Simplify source operands based on shuffle mask.
25639 return SDValue(N, 0);
25640
25641 // This is intentionally placed after demanded elements simplification because
25642 // it could eliminate knowledge of undef elements created by this shuffle.
25643 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25644 return ShufOp;
25645
25646 // Match shuffles that can be converted to any_vector_extend_in_reg.
25647 if (SDValue V =
25648 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25649 return V;
25650
25651 // Combine "truncate_vector_in_reg" style shuffles.
25652 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25653 return V;
25654
25655 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25656 Level < AfterLegalizeVectorOps &&
25657 (N1.isUndef() ||
25658 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25659 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25660 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25661 return V;
25662 }
25663
25664 // A shuffle of a concat of the same narrow vector can be reduced to use
25665 // only low-half elements of a concat with undef:
25666 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25667 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25668 N0.getNumOperands() == 2 &&
25669 N0.getOperand(0) == N0.getOperand(1)) {
25670 int HalfNumElts = (int)NumElts / 2;
25671 SmallVector<int, 8> NewMask;
25672 for (unsigned i = 0; i != NumElts; ++i) {
25673 int Idx = SVN->getMaskElt(i);
25674 if (Idx >= HalfNumElts) {
25675 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25676 Idx -= HalfNumElts;
25677 }
25678 NewMask.push_back(Idx);
25679 }
25680 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25681 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25682 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25683 N0.getOperand(0), UndefVec);
25684 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25685 }
25686 }
25687
25688 // See if we can replace a shuffle with an insert_subvector.
25689 // e.g. v2i32 into v8i32:
25690 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25691 // --> insert_subvector(lhs,rhs1,4).
25692 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25694 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25695 // Ensure RHS subvectors are legal.
25696 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25697 EVT SubVT = RHS.getOperand(0).getValueType();
25698 int NumSubVecs = RHS.getNumOperands();
25699 int NumSubElts = SubVT.getVectorNumElements();
25700 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25701 if (!TLI.isTypeLegal(SubVT))
25702 return SDValue();
25703
25704 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25705 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25706 return SDValue();
25707
25708 // Search [NumSubElts] spans for RHS sequence.
25709 // TODO: Can we avoid nested loops to increase performance?
25710 SmallVector<int> InsertionMask(NumElts);
25711 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25712 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25713 // Reset mask to identity.
25714 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25715
25716 // Add subvector insertion.
25717 std::iota(InsertionMask.begin() + SubIdx,
25718 InsertionMask.begin() + SubIdx + NumSubElts,
25719 NumElts + (SubVec * NumSubElts));
25720
25721 // See if the shuffle mask matches the reference insertion mask.
25722 bool MatchingShuffle = true;
25723 for (int i = 0; i != (int)NumElts; ++i) {
25724 int ExpectIdx = InsertionMask[i];
25725 int ActualIdx = Mask[i];
25726 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25727 MatchingShuffle = false;
25728 break;
25729 }
25730 }
25731
25732 if (MatchingShuffle)
25733 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25734 RHS.getOperand(SubVec),
25735 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25736 }
25737 }
25738 return SDValue();
25739 };
25740 ArrayRef<int> Mask = SVN->getMask();
25741 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25742 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25743 return InsertN1;
25744 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25745 SmallVector<int> CommuteMask(Mask);
25747 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25748 return InsertN0;
25749 }
25750 }
25751
25752 // If we're not performing a select/blend shuffle, see if we can convert the
25753 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25754 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25755 bool IsInLaneMask = true;
25756 ArrayRef<int> Mask = SVN->getMask();
25757 SmallVector<int, 16> ClearMask(NumElts, -1);
25758 APInt DemandedLHS = APInt::getZero(NumElts);
25759 APInt DemandedRHS = APInt::getZero(NumElts);
25760 for (int I = 0; I != (int)NumElts; ++I) {
25761 int M = Mask[I];
25762 if (M < 0)
25763 continue;
25764 ClearMask[I] = M == I ? I : (I + NumElts);
25765 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25766 if (M != I) {
25767 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25768 Demanded.setBit(M % NumElts);
25769 }
25770 }
25771 // TODO: Should we try to mask with N1 as well?
25772 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25773 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25774 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25775 SDLoc DL(N);
25778 // Transform the type to a legal type so that the buildvector constant
25779 // elements are not illegal. Make sure that the result is larger than the
25780 // original type, incase the value is split into two (eg i64->i32).
25781 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25782 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25783 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25784 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25785 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25786 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25787 for (int I = 0; I != (int)NumElts; ++I)
25788 if (0 <= Mask[I])
25789 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25790
25791 // See if a clear mask is legal instead of going via
25792 // XformToShuffleWithZero which loses UNDEF mask elements.
25793 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25794 return DAG.getBitcast(
25795 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25796 DAG.getConstant(0, DL, IntVT), ClearMask));
25797
25798 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25799 return DAG.getBitcast(
25800 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25801 DAG.getBuildVector(IntVT, DL, AndMask)));
25802 }
25803 }
25804 }
25805
25806 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25807 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25808 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25809 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25810 return Res;
25811
25812 // If this shuffle only has a single input that is a bitcasted shuffle,
25813 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25814 // back to their original types.
25815 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25816 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25817 TLI.isTypeLegal(VT)) {
25818
25820 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25821 EVT SVT = VT.getScalarType();
25822 EVT InnerVT = BC0->getValueType(0);
25823 EVT InnerSVT = InnerVT.getScalarType();
25824
25825 // Determine which shuffle works with the smaller scalar type.
25826 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25827 EVT ScaleSVT = ScaleVT.getScalarType();
25828
25829 if (TLI.isTypeLegal(ScaleVT) &&
25830 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25831 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25832 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25833 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25834
25835 // Scale the shuffle masks to the smaller scalar type.
25836 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25837 SmallVector<int, 8> InnerMask;
25838 SmallVector<int, 8> OuterMask;
25839 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25840 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25841
25842 // Merge the shuffle masks.
25843 SmallVector<int, 8> NewMask;
25844 for (int M : OuterMask)
25845 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25846
25847 // Test for shuffle mask legality over both commutations.
25848 SDValue SV0 = BC0->getOperand(0);
25849 SDValue SV1 = BC0->getOperand(1);
25850 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25851 if (!LegalMask) {
25852 std::swap(SV0, SV1);
25854 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25855 }
25856
25857 if (LegalMask) {
25858 SV0 = DAG.getBitcast(ScaleVT, SV0);
25859 SV1 = DAG.getBitcast(ScaleVT, SV1);
25860 return DAG.getBitcast(
25861 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25862 }
25863 }
25864 }
25865 }
25866
25867 // Match shuffles of bitcasts, so long as the mask can be treated as the
25868 // larger type.
25869 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25870 return V;
25871
25872 // Compute the combined shuffle mask for a shuffle with SV0 as the first
25873 // operand, and SV1 as the second operand.
25874 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25875 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25876 auto MergeInnerShuffle =
25877 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25878 ShuffleVectorSDNode *OtherSVN, SDValue N1,
25879 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25880 SmallVectorImpl<int> &Mask) -> bool {
25881 // Don't try to fold splats; they're likely to simplify somehow, or they
25882 // might be free.
25883 if (OtherSVN->isSplat())
25884 return false;
25885
25886 SV0 = SV1 = SDValue();
25887 Mask.clear();
25888
25889 for (unsigned i = 0; i != NumElts; ++i) {
25890 int Idx = SVN->getMaskElt(i);
25891 if (Idx < 0) {
25892 // Propagate Undef.
25893 Mask.push_back(Idx);
25894 continue;
25895 }
25896
25897 if (Commute)
25898 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25899
25900 SDValue CurrentVec;
25901 if (Idx < (int)NumElts) {
25902 // This shuffle index refers to the inner shuffle N0. Lookup the inner
25903 // shuffle mask to identify which vector is actually referenced.
25904 Idx = OtherSVN->getMaskElt(Idx);
25905 if (Idx < 0) {
25906 // Propagate Undef.
25907 Mask.push_back(Idx);
25908 continue;
25909 }
25910 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25911 : OtherSVN->getOperand(1);
25912 } else {
25913 // This shuffle index references an element within N1.
25914 CurrentVec = N1;
25915 }
25916
25917 // Simple case where 'CurrentVec' is UNDEF.
25918 if (CurrentVec.isUndef()) {
25919 Mask.push_back(-1);
25920 continue;
25921 }
25922
25923 // Canonicalize the shuffle index. We don't know yet if CurrentVec
25924 // will be the first or second operand of the combined shuffle.
25925 Idx = Idx % NumElts;
25926 if (!SV0.getNode() || SV0 == CurrentVec) {
25927 // Ok. CurrentVec is the left hand side.
25928 // Update the mask accordingly.
25929 SV0 = CurrentVec;
25930 Mask.push_back(Idx);
25931 continue;
25932 }
25933 if (!SV1.getNode() || SV1 == CurrentVec) {
25934 // Ok. CurrentVec is the right hand side.
25935 // Update the mask accordingly.
25936 SV1 = CurrentVec;
25937 Mask.push_back(Idx + NumElts);
25938 continue;
25939 }
25940
25941 // Last chance - see if the vector is another shuffle and if it
25942 // uses one of the existing candidate shuffle ops.
25943 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25944 int InnerIdx = CurrentSVN->getMaskElt(Idx);
25945 if (InnerIdx < 0) {
25946 Mask.push_back(-1);
25947 continue;
25948 }
25949 SDValue InnerVec = (InnerIdx < (int)NumElts)
25950 ? CurrentSVN->getOperand(0)
25951 : CurrentSVN->getOperand(1);
25952 if (InnerVec.isUndef()) {
25953 Mask.push_back(-1);
25954 continue;
25955 }
25956 InnerIdx %= NumElts;
25957 if (InnerVec == SV0) {
25958 Mask.push_back(InnerIdx);
25959 continue;
25960 }
25961 if (InnerVec == SV1) {
25962 Mask.push_back(InnerIdx + NumElts);
25963 continue;
25964 }
25965 }
25966
25967 // Bail out if we cannot convert the shuffle pair into a single shuffle.
25968 return false;
25969 }
25970
25971 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25972 return true;
25973
25974 // Avoid introducing shuffles with illegal mask.
25975 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25976 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25977 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25978 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25979 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25980 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25981 if (TLI.isShuffleMaskLegal(Mask, VT))
25982 return true;
25983
25984 std::swap(SV0, SV1);
25986 return TLI.isShuffleMaskLegal(Mask, VT);
25987 };
25988
25989 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25990 // Canonicalize shuffles according to rules:
25991 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25992 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25993 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25994 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25996 // The incoming shuffle must be of the same type as the result of the
25997 // current shuffle.
25998 assert(N1->getOperand(0).getValueType() == VT &&
25999 "Shuffle types don't match");
26000
26001 SDValue SV0 = N1->getOperand(0);
26002 SDValue SV1 = N1->getOperand(1);
26003 bool HasSameOp0 = N0 == SV0;
26004 bool IsSV1Undef = SV1.isUndef();
26005 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26006 // Commute the operands of this shuffle so merging below will trigger.
26007 return DAG.getCommutedVectorShuffle(*SVN);
26008 }
26009
26010 // Canonicalize splat shuffles to the RHS to improve merging below.
26011 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26012 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26013 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26014 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26015 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26016 return DAG.getCommutedVectorShuffle(*SVN);
26017 }
26018
26019 // Try to fold according to rules:
26020 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26021 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26022 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26023 // Don't try to fold shuffles with illegal type.
26024 // Only fold if this shuffle is the only user of the other shuffle.
26025 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26026 for (int i = 0; i != 2; ++i) {
26027 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26028 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26029 // The incoming shuffle must be of the same type as the result of the
26030 // current shuffle.
26031 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26032 assert(OtherSV->getOperand(0).getValueType() == VT &&
26033 "Shuffle types don't match");
26034
26035 SDValue SV0, SV1;
26037 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26038 SV0, SV1, Mask)) {
26039 // Check if all indices in Mask are Undef. In case, propagate Undef.
26040 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26041 return DAG.getUNDEF(VT);
26042
26043 return DAG.getVectorShuffle(VT, SDLoc(N),
26044 SV0 ? SV0 : DAG.getUNDEF(VT),
26045 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26046 }
26047 }
26048 }
26049
26050 // Merge shuffles through binops if we are able to merge it with at least
26051 // one other shuffles.
26052 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26053 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26054 unsigned SrcOpcode = N0.getOpcode();
26055 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26056 (N1.isUndef() ||
26057 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26058 // Get binop source ops, or just pass on the undef.
26059 SDValue Op00 = N0.getOperand(0);
26060 SDValue Op01 = N0.getOperand(1);
26061 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26062 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26063 // TODO: We might be able to relax the VT check but we don't currently
26064 // have any isBinOp() that has different result/ops VTs so play safe until
26065 // we have test coverage.
26066 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26067 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26068 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26069 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26070 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26071 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26072 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26073 SmallVectorImpl<int> &Mask, bool LeftOp,
26074 bool Commute) {
26075 SDValue InnerN = Commute ? N1 : N0;
26076 SDValue Op0 = LeftOp ? Op00 : Op01;
26077 SDValue Op1 = LeftOp ? Op10 : Op11;
26078 if (Commute)
26079 std::swap(Op0, Op1);
26080 // Only accept the merged shuffle if we don't introduce undef elements,
26081 // or the inner shuffle already contained undef elements.
26082 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26083 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26084 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26085 Mask) &&
26086 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26087 llvm::none_of(Mask, [](int M) { return M < 0; }));
26088 };
26089
26090 // Ensure we don't increase the number of shuffles - we must merge a
26091 // shuffle from at least one of the LHS and RHS ops.
26092 bool MergedLeft = false;
26093 SDValue LeftSV0, LeftSV1;
26094 SmallVector<int, 4> LeftMask;
26095 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26096 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26097 MergedLeft = true;
26098 } else {
26099 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26100 LeftSV0 = Op00, LeftSV1 = Op10;
26101 }
26102
26103 bool MergedRight = false;
26104 SDValue RightSV0, RightSV1;
26105 SmallVector<int, 4> RightMask;
26106 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26107 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26108 MergedRight = true;
26109 } else {
26110 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26111 RightSV0 = Op01, RightSV1 = Op11;
26112 }
26113
26114 if (MergedLeft || MergedRight) {
26115 SDLoc DL(N);
26117 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26118 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26120 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26121 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26122 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26123 }
26124 }
26125 }
26126 }
26127
26128 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26129 return V;
26130
26131 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26132 // Perform this really late, because it could eliminate knowledge
26133 // of undef elements created by this shuffle.
26134 if (Level < AfterLegalizeTypes)
26135 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26136 LegalOperations))
26137 return V;
26138
26139 return SDValue();
26140}
26141
26142SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26143 EVT VT = N->getValueType(0);
26144 if (!VT.isFixedLengthVector())
26145 return SDValue();
26146
26147 // Try to convert a scalar binop with an extracted vector element to a vector
26148 // binop. This is intended to reduce potentially expensive register moves.
26149 // TODO: Check if both operands are extracted.
26150 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26151 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26152 SDValue Scalar = N->getOperand(0);
26153 unsigned Opcode = Scalar.getOpcode();
26154 EVT VecEltVT = VT.getScalarType();
26155 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26156 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26157 Scalar.getOperand(0).getValueType() == VecEltVT &&
26158 Scalar.getOperand(1).getValueType() == VecEltVT &&
26159 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26160 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26161 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26162 // Match an extract element and get a shuffle mask equivalent.
26163 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26164
26165 for (int i : {0, 1}) {
26166 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26167 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26168 SDValue EE = Scalar.getOperand(i);
26169 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26170 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26171 EE.getOperand(0).getValueType() == VT &&
26172 isa<ConstantSDNode>(EE.getOperand(1))) {
26173 // Mask = {ExtractIndex, undef, undef....}
26174 ShufMask[0] = EE.getConstantOperandVal(1);
26175 // Make sure the shuffle is legal if we are crossing lanes.
26176 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26177 SDLoc DL(N);
26178 SDValue V[] = {EE.getOperand(0),
26179 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26180 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26181 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26182 ShufMask);
26183 }
26184 }
26185 }
26186 }
26187
26188 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26189 // with a VECTOR_SHUFFLE and possible truncate.
26190 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26191 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26192 return SDValue();
26193
26194 // If we have an implicit truncate, truncate here if it is legal.
26195 if (VecEltVT != Scalar.getValueType() &&
26196 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26197 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26198 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26199 }
26200
26201 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26202 if (!ExtIndexC)
26203 return SDValue();
26204
26205 SDValue SrcVec = Scalar.getOperand(0);
26206 EVT SrcVT = SrcVec.getValueType();
26207 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26208 unsigned VTNumElts = VT.getVectorNumElements();
26209 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26210 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26211 SmallVector<int, 8> Mask(SrcNumElts, -1);
26212 Mask[0] = ExtIndexC->getZExtValue();
26213 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26214 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26215 if (!LegalShuffle)
26216 return SDValue();
26217
26218 // If the initial vector is the same size, the shuffle is the result.
26219 if (VT == SrcVT)
26220 return LegalShuffle;
26221
26222 // If not, shorten the shuffled vector.
26223 if (VTNumElts != SrcNumElts) {
26224 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26225 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26226 SrcVT.getVectorElementType(), VTNumElts);
26227 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26228 ZeroIdx);
26229 }
26230 }
26231
26232 return SDValue();
26233}
26234
26235SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26236 EVT VT = N->getValueType(0);
26237 SDValue N0 = N->getOperand(0);
26238 SDValue N1 = N->getOperand(1);
26239 SDValue N2 = N->getOperand(2);
26240 uint64_t InsIdx = N->getConstantOperandVal(2);
26241
26242 // If inserting an UNDEF, just return the original vector.
26243 if (N1.isUndef())
26244 return N0;
26245
26246 // If this is an insert of an extracted vector into an undef vector, we can
26247 // just use the input to the extract if the types match, and can simplify
26248 // in some cases even if they don't.
26249 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26250 N1.getOperand(1) == N2) {
26251 EVT SrcVT = N1.getOperand(0).getValueType();
26252 if (SrcVT == VT)
26253 return N1.getOperand(0);
26254 // TODO: To remove the zero check, need to adjust the offset to
26255 // a multiple of the new src type.
26256 if (isNullConstant(N2) &&
26257 VT.isScalableVector() == SrcVT.isScalableVector()) {
26259 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26260 VT, N0, N1.getOperand(0), N2);
26261 else
26263 VT, N1.getOperand(0), N2);
26264 }
26265 }
26266
26267 // Handle case where we've ended up inserting back into the source vector
26268 // we extracted the subvector from.
26269 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26270 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26271 N1.getOperand(1) == N2)
26272 return N0;
26273
26274 // Simplify scalar inserts into an undef vector:
26275 // insert_subvector undef, (splat X), N2 -> splat X
26276 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26277 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26278 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26279
26280 // If we are inserting a bitcast value into an undef, with the same
26281 // number of elements, just use the bitcast input of the extract.
26282 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26283 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26284 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26286 N1.getOperand(0).getOperand(1) == N2 &&
26288 VT.getVectorElementCount() &&
26290 VT.getSizeInBits()) {
26291 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26292 }
26293
26294 // If both N1 and N2 are bitcast values on which insert_subvector
26295 // would makes sense, pull the bitcast through.
26296 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26297 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26298 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26299 SDValue CN0 = N0.getOperand(0);
26300 SDValue CN1 = N1.getOperand(0);
26301 EVT CN0VT = CN0.getValueType();
26302 EVT CN1VT = CN1.getValueType();
26303 if (CN0VT.isVector() && CN1VT.isVector() &&
26304 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26306 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26307 CN0.getValueType(), CN0, CN1, N2);
26308 return DAG.getBitcast(VT, NewINSERT);
26309 }
26310 }
26311
26312 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26313 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26314 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26315 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26316 N0.getOperand(1).getValueType() == N1.getValueType() &&
26317 N0.getOperand(2) == N2)
26318 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26319 N1, N2);
26320
26321 // Eliminate an intermediate insert into an undef vector:
26322 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26323 // insert_subvector undef, X, 0
26324 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26325 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26326 isNullConstant(N2))
26327 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26328 N1.getOperand(1), N2);
26329
26330 // Push subvector bitcasts to the output, adjusting the index as we go.
26331 // insert_subvector(bitcast(v), bitcast(s), c1)
26332 // -> bitcast(insert_subvector(v, s, c2))
26333 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26334 N1.getOpcode() == ISD::BITCAST) {
26335 SDValue N0Src = peekThroughBitcasts(N0);
26336 SDValue N1Src = peekThroughBitcasts(N1);
26337 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26338 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26339 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26340 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26341 EVT NewVT;
26342 SDLoc DL(N);
26343 SDValue NewIdx;
26344 LLVMContext &Ctx = *DAG.getContext();
26345 ElementCount NumElts = VT.getVectorElementCount();
26346 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26347 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26348 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26349 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26350 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26351 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26352 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26353 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26354 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26355 NumElts.divideCoefficientBy(Scale));
26356 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26357 }
26358 }
26359 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26360 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26361 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26362 return DAG.getBitcast(VT, Res);
26363 }
26364 }
26365 }
26366
26367 // Canonicalize insert_subvector dag nodes.
26368 // Example:
26369 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26370 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26371 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26372 N1.getValueType() == N0.getOperand(1).getValueType()) {
26373 unsigned OtherIdx = N0.getConstantOperandVal(2);
26374 if (InsIdx < OtherIdx) {
26375 // Swap nodes.
26376 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26377 N0.getOperand(0), N1, N2);
26378 AddToWorklist(NewOp.getNode());
26379 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26380 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26381 }
26382 }
26383
26384 // If the input vector is a concatenation, and the insert replaces
26385 // one of the pieces, we can optimize into a single concat_vectors.
26386 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26387 N0.getOperand(0).getValueType() == N1.getValueType() &&
26390 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26391 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26392 Ops[InsIdx / Factor] = N1;
26393 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26394 }
26395
26396 // Simplify source operands based on insertion.
26398 return SDValue(N, 0);
26399
26400 return SDValue();
26401}
26402
26403SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26404 SDValue N0 = N->getOperand(0);
26405
26406 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26407 if (N0->getOpcode() == ISD::FP16_TO_FP)
26408 return N0->getOperand(0);
26409
26410 return SDValue();
26411}
26412
26413SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26414 auto Op = N->getOpcode();
26416 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26417 SDValue N0 = N->getOperand(0);
26418
26419 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26420 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26421 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26423 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26424 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26425 }
26426 }
26427
26428 return SDValue();
26429}
26430
26431SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26432 SDValue N0 = N->getOperand(0);
26433
26434 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26435 if (N0->getOpcode() == ISD::BF16_TO_FP)
26436 return N0->getOperand(0);
26437
26438 return SDValue();
26439}
26440
26441SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26442 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26443 return visitFP16_TO_FP(N);
26444}
26445
26446SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26447 SDValue N0 = N->getOperand(0);
26448 EVT VT = N0.getValueType();
26449 unsigned Opcode = N->getOpcode();
26450
26451 // VECREDUCE over 1-element vector is just an extract.
26452 if (VT.getVectorElementCount().isScalar()) {
26453 SDLoc dl(N);
26454 SDValue Res =
26456 DAG.getVectorIdxConstant(0, dl));
26457 if (Res.getValueType() != N->getValueType(0))
26458 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26459 return Res;
26460 }
26461
26462 // On an boolean vector an and/or reduction is the same as a umin/umax
26463 // reduction. Convert them if the latter is legal while the former isn't.
26464 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26465 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26467 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26468 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26470 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26471 }
26472
26473 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26474 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26475 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26476 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26477 SDValue Vec = N0.getOperand(0);
26478 SDValue Subvec = N0.getOperand(1);
26479 if ((Opcode == ISD::VECREDUCE_OR &&
26480 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26481 (Opcode == ISD::VECREDUCE_AND &&
26482 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26483 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26484 }
26485
26486 return SDValue();
26487}
26488
26489SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26490 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26491
26492 // FSUB -> FMA combines:
26493 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26494 AddToWorklist(Fused.getNode());
26495 return Fused;
26496 }
26497 return SDValue();
26498}
26499
26500SDValue DAGCombiner::visitVPOp(SDNode *N) {
26501
26502 if (N->getOpcode() == ISD::VP_GATHER)
26503 if (SDValue SD = visitVPGATHER(N))
26504 return SD;
26505
26506 if (N->getOpcode() == ISD::VP_SCATTER)
26507 if (SDValue SD = visitVPSCATTER(N))
26508 return SD;
26509
26510 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26511 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26512 return SD;
26513
26514 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26515 if (SDValue SD = visitVP_STRIDED_STORE(N))
26516 return SD;
26517
26518 // VP operations in which all vector elements are disabled - either by
26519 // determining that the mask is all false or that the EVL is 0 - can be
26520 // eliminated.
26521 bool AreAllEltsDisabled = false;
26522 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26523 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26524 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26525 AreAllEltsDisabled |=
26526 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26527
26528 // This is the only generic VP combine we support for now.
26529 if (!AreAllEltsDisabled) {
26530 switch (N->getOpcode()) {
26531 case ISD::VP_FADD:
26532 return visitVP_FADD(N);
26533 case ISD::VP_FSUB:
26534 return visitVP_FSUB(N);
26535 case ISD::VP_FMA:
26536 return visitFMA<VPMatchContext>(N);
26537 case ISD::VP_SELECT:
26538 return visitVP_SELECT(N);
26539 }
26540 return SDValue();
26541 }
26542
26543 // Binary operations can be replaced by UNDEF.
26544 if (ISD::isVPBinaryOp(N->getOpcode()))
26545 return DAG.getUNDEF(N->getValueType(0));
26546
26547 // VP Memory operations can be replaced by either the chain (stores) or the
26548 // chain + undef (loads).
26549 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26550 if (MemSD->writeMem())
26551 return MemSD->getChain();
26552 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26553 }
26554
26555 // Reduction operations return the start operand when no elements are active.
26556 if (ISD::isVPReduction(N->getOpcode()))
26557 return N->getOperand(0);
26558
26559 return SDValue();
26560}
26561
26562SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26563 SDValue Chain = N->getOperand(0);
26564 SDValue Ptr = N->getOperand(1);
26565 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26566
26567 // Check if the memory, where FP state is written to, is used only in a single
26568 // load operation.
26569 LoadSDNode *LdNode = nullptr;
26570 for (auto *U : Ptr->uses()) {
26571 if (U == N)
26572 continue;
26573 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26574 if (LdNode && LdNode != Ld)
26575 return SDValue();
26576 LdNode = Ld;
26577 continue;
26578 }
26579 return SDValue();
26580 }
26581 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26582 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26584 return SDValue();
26585
26586 // Check if the loaded value is used only in a store operation.
26587 StoreSDNode *StNode = nullptr;
26588 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26589 SDUse &U = I.getUse();
26590 if (U.getResNo() == 0) {
26591 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26592 if (StNode)
26593 return SDValue();
26594 StNode = St;
26595 } else {
26596 return SDValue();
26597 }
26598 }
26599 }
26600 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26601 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26602 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26603 return SDValue();
26604
26605 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26606 // environment.
26607 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26608 StNode->getMemOperand());
26609 CombineTo(StNode, Res, false);
26610 return Res;
26611}
26612
26613SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26614 SDValue Chain = N->getOperand(0);
26615 SDValue Ptr = N->getOperand(1);
26616 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26617
26618 // Check if the address of FP state is used also in a store operation only.
26619 StoreSDNode *StNode = nullptr;
26620 for (auto *U : Ptr->uses()) {
26621 if (U == N)
26622 continue;
26623 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26624 if (StNode && StNode != St)
26625 return SDValue();
26626 StNode = St;
26627 continue;
26628 }
26629 return SDValue();
26630 }
26631 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26632 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26633 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26634 return SDValue();
26635
26636 // Check if the stored value is loaded from some location and the loaded
26637 // value is used only in the store operation.
26638 SDValue StValue = StNode->getValue();
26639 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26640 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26641 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26642 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26643 return SDValue();
26644
26645 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26646 // environment.
26647 SDValue Res =
26648 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26649 LdNode->getMemOperand());
26650 return Res;
26651}
26652
26653/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26654/// with the destination vector and a zero vector.
26655/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26656/// vector_shuffle V, Zero, <0, 4, 2, 4>
26657SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26658 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26659
26660 EVT VT = N->getValueType(0);
26661 SDValue LHS = N->getOperand(0);
26662 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26663 SDLoc DL(N);
26664
26665 // Make sure we're not running after operation legalization where it
26666 // may have custom lowered the vector shuffles.
26667 if (LegalOperations)
26668 return SDValue();
26669
26670 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26671 return SDValue();
26672
26673 EVT RVT = RHS.getValueType();
26674 unsigned NumElts = RHS.getNumOperands();
26675
26676 // Attempt to create a valid clear mask, splitting the mask into
26677 // sub elements and checking to see if each is
26678 // all zeros or all ones - suitable for shuffle masking.
26679 auto BuildClearMask = [&](int Split) {
26680 int NumSubElts = NumElts * Split;
26681 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26682
26683 SmallVector<int, 8> Indices;
26684 for (int i = 0; i != NumSubElts; ++i) {
26685 int EltIdx = i / Split;
26686 int SubIdx = i % Split;
26687 SDValue Elt = RHS.getOperand(EltIdx);
26688 // X & undef --> 0 (not undef). So this lane must be converted to choose
26689 // from the zero constant vector (same as if the element had all 0-bits).
26690 if (Elt.isUndef()) {
26691 Indices.push_back(i + NumSubElts);
26692 continue;
26693 }
26694
26695 APInt Bits;
26696 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26697 Bits = Cst->getAPIntValue();
26698 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26699 Bits = CstFP->getValueAPF().bitcastToAPInt();
26700 else
26701 return SDValue();
26702
26703 // Extract the sub element from the constant bit mask.
26704 if (DAG.getDataLayout().isBigEndian())
26705 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26706 else
26707 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26708
26709 if (Bits.isAllOnes())
26710 Indices.push_back(i);
26711 else if (Bits == 0)
26712 Indices.push_back(i + NumSubElts);
26713 else
26714 return SDValue();
26715 }
26716
26717 // Let's see if the target supports this vector_shuffle.
26718 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26719 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26720 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26721 return SDValue();
26722
26723 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26724 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26725 DAG.getBitcast(ClearVT, LHS),
26726 Zero, Indices));
26727 };
26728
26729 // Determine maximum split level (byte level masking).
26730 int MaxSplit = 1;
26731 if (RVT.getScalarSizeInBits() % 8 == 0)
26732 MaxSplit = RVT.getScalarSizeInBits() / 8;
26733
26734 for (int Split = 1; Split <= MaxSplit; ++Split)
26735 if (RVT.getScalarSizeInBits() % Split == 0)
26736 if (SDValue S = BuildClearMask(Split))
26737 return S;
26738
26739 return SDValue();
26740}
26741
26742/// If a vector binop is performed on splat values, it may be profitable to
26743/// extract, scalarize, and insert/splat.
26745 const SDLoc &DL) {
26746 SDValue N0 = N->getOperand(0);
26747 SDValue N1 = N->getOperand(1);
26748 unsigned Opcode = N->getOpcode();
26749 EVT VT = N->getValueType(0);
26750 EVT EltVT = VT.getVectorElementType();
26751 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26752
26753 // TODO: Remove/replace the extract cost check? If the elements are available
26754 // as scalars, then there may be no extract cost. Should we ask if
26755 // inserting a scalar back into a vector is cheap instead?
26756 int Index0, Index1;
26757 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26758 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26759 // Extract element from splat_vector should be free.
26760 // TODO: use DAG.isSplatValue instead?
26761 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26763 if (!Src0 || !Src1 || Index0 != Index1 ||
26764 Src0.getValueType().getVectorElementType() != EltVT ||
26765 Src1.getValueType().getVectorElementType() != EltVT ||
26766 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26767 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26768 return SDValue();
26769
26770 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26771 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26772 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26773 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26774
26775 // If all lanes but 1 are undefined, no need to splat the scalar result.
26776 // TODO: Keep track of undefs and use that info in the general case.
26777 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26778 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26779 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26780 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26781 // build_vec ..undef, (bo X, Y), undef...
26783 Ops[Index0] = ScalarBO;
26784 return DAG.getBuildVector(VT, DL, Ops);
26785 }
26786
26787 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26788 return DAG.getSplat(VT, DL, ScalarBO);
26789}
26790
26791/// Visit a vector cast operation, like FP_EXTEND.
26792SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26793 EVT VT = N->getValueType(0);
26794 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26795 EVT EltVT = VT.getVectorElementType();
26796 unsigned Opcode = N->getOpcode();
26797
26798 SDValue N0 = N->getOperand(0);
26799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26800
26801 // TODO: promote operation might be also good here?
26802 int Index0;
26803 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26804 if (Src0 &&
26805 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26806 TLI.isExtractVecEltCheap(VT, Index0)) &&
26807 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26808 TLI.preferScalarizeSplat(N)) {
26809 EVT SrcVT = N0.getValueType();
26810 EVT SrcEltVT = SrcVT.getVectorElementType();
26811 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26812 SDValue Elt =
26813 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26814 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26815 if (VT.isScalableVector())
26816 return DAG.getSplatVector(VT, DL, ScalarBO);
26818 return DAG.getBuildVector(VT, DL, Ops);
26819 }
26820
26821 return SDValue();
26822}
26823
26824/// Visit a binary vector operation, like ADD.
26825SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26826 EVT VT = N->getValueType(0);
26827 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26828
26829 SDValue LHS = N->getOperand(0);
26830 SDValue RHS = N->getOperand(1);
26831 unsigned Opcode = N->getOpcode();
26832 SDNodeFlags Flags = N->getFlags();
26833
26834 // Move unary shuffles with identical masks after a vector binop:
26835 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26836 // --> shuffle (VBinOp A, B), Undef, Mask
26837 // This does not require type legality checks because we are creating the
26838 // same types of operations that are in the original sequence. We do have to
26839 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26840 // though. This code is adapted from the identical transform in instcombine.
26841 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26842 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26843 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26844 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26845 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26846 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26847 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26848 RHS.getOperand(0), Flags);
26849 SDValue UndefV = LHS.getOperand(1);
26850 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26851 }
26852
26853 // Try to sink a splat shuffle after a binop with a uniform constant.
26854 // This is limited to cases where neither the shuffle nor the constant have
26855 // undefined elements because that could be poison-unsafe or inhibit
26856 // demanded elements analysis. It is further limited to not change a splat
26857 // of an inserted scalar because that may be optimized better by
26858 // load-folding or other target-specific behaviors.
26859 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26860 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26861 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26862 // binop (splat X), (splat C) --> splat (binop X, C)
26863 SDValue X = Shuf0->getOperand(0);
26864 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26865 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26866 Shuf0->getMask());
26867 }
26868 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26869 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26870 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26871 // binop (splat C), (splat X) --> splat (binop C, X)
26872 SDValue X = Shuf1->getOperand(0);
26873 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26874 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26875 Shuf1->getMask());
26876 }
26877 }
26878
26879 // The following pattern is likely to emerge with vector reduction ops. Moving
26880 // the binary operation ahead of insertion may allow using a narrower vector
26881 // instruction that has better performance than the wide version of the op:
26882 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26883 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26884 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26885 LHS.getOperand(2) == RHS.getOperand(2) &&
26886 (LHS.hasOneUse() || RHS.hasOneUse())) {
26887 SDValue X = LHS.getOperand(1);
26888 SDValue Y = RHS.getOperand(1);
26889 SDValue Z = LHS.getOperand(2);
26890 EVT NarrowVT = X.getValueType();
26891 if (NarrowVT == Y.getValueType() &&
26892 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26893 LegalOperations)) {
26894 // (binop undef, undef) may not return undef, so compute that result.
26895 SDValue VecC =
26896 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26897 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26898 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26899 }
26900 }
26901
26902 // Make sure all but the first op are undef or constant.
26903 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26904 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26905 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26906 return Op.isUndef() ||
26907 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26908 });
26909 };
26910
26911 // The following pattern is likely to emerge with vector reduction ops. Moving
26912 // the binary operation ahead of the concat may allow using a narrower vector
26913 // instruction that has better performance than the wide version of the op:
26914 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26915 // concat (VBinOp X, Y), VecC
26916 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26917 (LHS.hasOneUse() || RHS.hasOneUse())) {
26918 EVT NarrowVT = LHS.getOperand(0).getValueType();
26919 if (NarrowVT == RHS.getOperand(0).getValueType() &&
26920 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26921 unsigned NumOperands = LHS.getNumOperands();
26922 SmallVector<SDValue, 4> ConcatOps;
26923 for (unsigned i = 0; i != NumOperands; ++i) {
26924 // This constant fold for operands 1 and up.
26925 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26926 RHS.getOperand(i)));
26927 }
26928
26929 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26930 }
26931 }
26932
26933 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26934 return V;
26935
26936 return SDValue();
26937}
26938
26939SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26940 SDValue N2) {
26941 assert(N0.getOpcode() == ISD::SETCC &&
26942 "First argument must be a SetCC node!");
26943
26944 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26945 cast<CondCodeSDNode>(N0.getOperand(2))->get());
26946
26947 // If we got a simplified select_cc node back from SimplifySelectCC, then
26948 // break it down into a new SETCC node, and a new SELECT node, and then return
26949 // the SELECT node, since we were called with a SELECT node.
26950 if (SCC.getNode()) {
26951 // Check to see if we got a select_cc back (to turn into setcc/select).
26952 // Otherwise, just return whatever node we got back, like fabs.
26953 if (SCC.getOpcode() == ISD::SELECT_CC) {
26954 const SDNodeFlags Flags = N0->getFlags();
26956 N0.getValueType(),
26957 SCC.getOperand(0), SCC.getOperand(1),
26958 SCC.getOperand(4), Flags);
26959 AddToWorklist(SETCC.getNode());
26960 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26961 SCC.getOperand(2), SCC.getOperand(3));
26962 SelectNode->setFlags(Flags);
26963 return SelectNode;
26964 }
26965
26966 return SCC;
26967 }
26968 return SDValue();
26969}
26970
26971/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26972/// being selected between, see if we can simplify the select. Callers of this
26973/// should assume that TheSelect is deleted if this returns true. As such, they
26974/// should return the appropriate thing (e.g. the node) back to the top-level of
26975/// the DAG combiner loop to avoid it being looked at.
26976bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26977 SDValue RHS) {
26978 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26979 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26980 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26981 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26982 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26983 SDValue Sqrt = RHS;
26985 SDValue CmpLHS;
26986 const ConstantFPSDNode *Zero = nullptr;
26987
26988 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26989 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26990 CmpLHS = TheSelect->getOperand(0);
26991 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26992 } else {
26993 // SELECT or VSELECT
26994 SDValue Cmp = TheSelect->getOperand(0);
26995 if (Cmp.getOpcode() == ISD::SETCC) {
26996 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26997 CmpLHS = Cmp.getOperand(0);
26998 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26999 }
27000 }
27001 if (Zero && Zero->isZero() &&
27002 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27003 CC == ISD::SETULT || CC == ISD::SETLT)) {
27004 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27005 CombineTo(TheSelect, Sqrt);
27006 return true;
27007 }
27008 }
27009 }
27010 // Cannot simplify select with vector condition
27011 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27012
27013 // If this is a select from two identical things, try to pull the operation
27014 // through the select.
27015 if (LHS.getOpcode() != RHS.getOpcode() ||
27016 !LHS.hasOneUse() || !RHS.hasOneUse())
27017 return false;
27018
27019 // If this is a load and the token chain is identical, replace the select
27020 // of two loads with a load through a select of the address to load from.
27021 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27022 // constants have been dropped into the constant pool.
27023 if (LHS.getOpcode() == ISD::LOAD) {
27024 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27025 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27026
27027 // Token chains must be identical.
27028 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27029 // Do not let this transformation reduce the number of volatile loads.
27030 // Be conservative for atomics for the moment
27031 // TODO: This does appear to be legal for unordered atomics (see D66309)
27032 !LLD->isSimple() || !RLD->isSimple() ||
27033 // FIXME: If either is a pre/post inc/dec load,
27034 // we'd need to split out the address adjustment.
27035 LLD->isIndexed() || RLD->isIndexed() ||
27036 // If this is an EXTLOAD, the VT's must match.
27037 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27038 // If this is an EXTLOAD, the kind of extension must match.
27039 (LLD->getExtensionType() != RLD->getExtensionType() &&
27040 // The only exception is if one of the extensions is anyext.
27041 LLD->getExtensionType() != ISD::EXTLOAD &&
27042 RLD->getExtensionType() != ISD::EXTLOAD) ||
27043 // FIXME: this discards src value information. This is
27044 // over-conservative. It would be beneficial to be able to remember
27045 // both potential memory locations. Since we are discarding
27046 // src value info, don't do the transformation if the memory
27047 // locations are not in the default address space.
27048 LLD->getPointerInfo().getAddrSpace() != 0 ||
27049 RLD->getPointerInfo().getAddrSpace() != 0 ||
27050 // We can't produce a CMOV of a TargetFrameIndex since we won't
27051 // generate the address generation required.
27054 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27055 LLD->getBasePtr().getValueType()))
27056 return false;
27057
27058 // The loads must not depend on one another.
27059 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27060 return false;
27061
27062 // Check that the select condition doesn't reach either load. If so,
27063 // folding this will induce a cycle into the DAG. If not, this is safe to
27064 // xform, so create a select of the addresses.
27065
27068
27069 // Always fail if LLD and RLD are not independent. TheSelect is a
27070 // predecessor to all Nodes in question so we need not search past it.
27071
27072 Visited.insert(TheSelect);
27073 Worklist.push_back(LLD);
27074 Worklist.push_back(RLD);
27075
27076 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27077 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27078 return false;
27079
27080 SDValue Addr;
27081 if (TheSelect->getOpcode() == ISD::SELECT) {
27082 // We cannot do this optimization if any pair of {RLD, LLD} is a
27083 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27084 // Loads, we only need to check if CondNode is a successor to one of the
27085 // loads. We can further avoid this if there's no use of their chain
27086 // value.
27087 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27088 Worklist.push_back(CondNode);
27089
27090 if ((LLD->hasAnyUseOfValue(1) &&
27091 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27092 (RLD->hasAnyUseOfValue(1) &&
27093 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27094 return false;
27095
27096 Addr = DAG.getSelect(SDLoc(TheSelect),
27097 LLD->getBasePtr().getValueType(),
27098 TheSelect->getOperand(0), LLD->getBasePtr(),
27099 RLD->getBasePtr());
27100 } else { // Otherwise SELECT_CC
27101 // We cannot do this optimization if any pair of {RLD, LLD} is a
27102 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27103 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27104 // one of the loads. We can further avoid this if there's no use of their
27105 // chain value.
27106
27107 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27108 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27109 Worklist.push_back(CondLHS);
27110 Worklist.push_back(CondRHS);
27111
27112 if ((LLD->hasAnyUseOfValue(1) &&
27113 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27114 (RLD->hasAnyUseOfValue(1) &&
27115 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27116 return false;
27117
27118 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27119 LLD->getBasePtr().getValueType(),
27120 TheSelect->getOperand(0),
27121 TheSelect->getOperand(1),
27122 LLD->getBasePtr(), RLD->getBasePtr(),
27123 TheSelect->getOperand(4));
27124 }
27125
27126 SDValue Load;
27127 // It is safe to replace the two loads if they have different alignments,
27128 // but the new load must be the minimum (most restrictive) alignment of the
27129 // inputs.
27130 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27131 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27132 if (!RLD->isInvariant())
27133 MMOFlags &= ~MachineMemOperand::MOInvariant;
27134 if (!RLD->isDereferenceable())
27135 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27136 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27137 // FIXME: Discards pointer and AA info.
27138 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27139 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27140 MMOFlags);
27141 } else {
27142 // FIXME: Discards pointer and AA info.
27143 Load = DAG.getExtLoad(
27145 : LLD->getExtensionType(),
27146 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27147 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27148 }
27149
27150 // Users of the select now use the result of the load.
27151 CombineTo(TheSelect, Load);
27152
27153 // Users of the old loads now use the new load's chain. We know the
27154 // old-load value is dead now.
27155 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27156 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27157 return true;
27158 }
27159
27160 return false;
27161}
27162
27163/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27164/// bitwise 'and'.
27165SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27166 SDValue N1, SDValue N2, SDValue N3,
27167 ISD::CondCode CC) {
27168 // If this is a select where the false operand is zero and the compare is a
27169 // check of the sign bit, see if we can perform the "gzip trick":
27170 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27171 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27172 EVT XType = N0.getValueType();
27173 EVT AType = N2.getValueType();
27174 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27175 return SDValue();
27176
27177 // If the comparison is testing for a positive value, we have to invert
27178 // the sign bit mask, so only do that transform if the target has a bitwise
27179 // 'and not' instruction (the invert is free).
27180 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27181 // (X > -1) ? A : 0
27182 // (X > 0) ? X : 0 <-- This is canonical signed max.
27183 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27184 return SDValue();
27185 } else if (CC == ISD::SETLT) {
27186 // (X < 0) ? A : 0
27187 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27188 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27189 return SDValue();
27190 } else {
27191 return SDValue();
27192 }
27193
27194 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27195 // constant.
27196 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27197 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27198 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27199 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27200 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27201 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27202 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27203 AddToWorklist(Shift.getNode());
27204
27205 if (XType.bitsGT(AType)) {
27206 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27207 AddToWorklist(Shift.getNode());
27208 }
27209
27210 if (CC == ISD::SETGT)
27211 Shift = DAG.getNOT(DL, Shift, AType);
27212
27213 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27214 }
27215 }
27216
27217 unsigned ShCt = XType.getSizeInBits() - 1;
27218 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27219 return SDValue();
27220
27221 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27222 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27223 AddToWorklist(Shift.getNode());
27224
27225 if (XType.bitsGT(AType)) {
27226 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27227 AddToWorklist(Shift.getNode());
27228 }
27229
27230 if (CC == ISD::SETGT)
27231 Shift = DAG.getNOT(DL, Shift, AType);
27232
27233 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27234}
27235
27236// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27237SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27238 SDValue N0 = N->getOperand(0);
27239 SDValue N1 = N->getOperand(1);
27240 SDValue N2 = N->getOperand(2);
27241 SDLoc DL(N);
27242
27243 unsigned BinOpc = N1.getOpcode();
27244 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27245 (N1.getResNo() != N2.getResNo()))
27246 return SDValue();
27247
27248 // The use checks are intentionally on SDNode because we may be dealing
27249 // with opcodes that produce more than one SDValue.
27250 // TODO: Do we really need to check N0 (the condition operand of the select)?
27251 // But removing that clause could cause an infinite loop...
27252 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27253 return SDValue();
27254
27255 // Binops may include opcodes that return multiple values, so all values
27256 // must be created/propagated from the newly created binops below.
27257 SDVTList OpVTs = N1->getVTList();
27258
27259 // Fold select(cond, binop(x, y), binop(z, y))
27260 // --> binop(select(cond, x, z), y)
27261 if (N1.getOperand(1) == N2.getOperand(1)) {
27262 SDValue N10 = N1.getOperand(0);
27263 SDValue N20 = N2.getOperand(0);
27264 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27265 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27266 NewBinOp->setFlags(N1->getFlags());
27267 NewBinOp->intersectFlagsWith(N2->getFlags());
27268 return SDValue(NewBinOp.getNode(), N1.getResNo());
27269 }
27270
27271 // Fold select(cond, binop(x, y), binop(x, z))
27272 // --> binop(x, select(cond, y, z))
27273 if (N1.getOperand(0) == N2.getOperand(0)) {
27274 SDValue N11 = N1.getOperand(1);
27275 SDValue N21 = N2.getOperand(1);
27276 // Second op VT might be different (e.g. shift amount type)
27277 if (N11.getValueType() == N21.getValueType()) {
27278 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27279 SDValue NewBinOp =
27280 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27281 NewBinOp->setFlags(N1->getFlags());
27282 NewBinOp->intersectFlagsWith(N2->getFlags());
27283 return SDValue(NewBinOp.getNode(), N1.getResNo());
27284 }
27285 }
27286
27287 // TODO: Handle isCommutativeBinOp patterns as well?
27288 return SDValue();
27289}
27290
27291// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27292SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27293 SDValue N0 = N->getOperand(0);
27294 EVT VT = N->getValueType(0);
27295 bool IsFabs = N->getOpcode() == ISD::FABS;
27296 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27297
27298 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27299 return SDValue();
27300
27301 SDValue Int = N0.getOperand(0);
27302 EVT IntVT = Int.getValueType();
27303
27304 // The operand to cast should be integer.
27305 if (!IntVT.isInteger() || IntVT.isVector())
27306 return SDValue();
27307
27308 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27309 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27310 APInt SignMask;
27311 if (N0.getValueType().isVector()) {
27312 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27313 // 0x7f...) per element and splat it.
27315 if (IsFabs)
27316 SignMask = ~SignMask;
27317 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27318 } else {
27319 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27320 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27321 if (IsFabs)
27322 SignMask = ~SignMask;
27323 }
27324 SDLoc DL(N0);
27325 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27326 DAG.getConstant(SignMask, DL, IntVT));
27327 AddToWorklist(Int.getNode());
27328 return DAG.getBitcast(VT, Int);
27329}
27330
27331/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27332/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27333/// in it. This may be a win when the constant is not otherwise available
27334/// because it replaces two constant pool loads with one.
27335SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27336 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27337 ISD::CondCode CC) {
27339 return SDValue();
27340
27341 // If we are before legalize types, we want the other legalization to happen
27342 // first (for example, to avoid messing with soft float).
27343 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27344 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27345 EVT VT = N2.getValueType();
27346 if (!TV || !FV || !TLI.isTypeLegal(VT))
27347 return SDValue();
27348
27349 // If a constant can be materialized without loads, this does not make sense.
27351 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27352 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27353 return SDValue();
27354
27355 // If both constants have multiple uses, then we won't need to do an extra
27356 // load. The values are likely around in registers for other users.
27357 if (!TV->hasOneUse() && !FV->hasOneUse())
27358 return SDValue();
27359
27360 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27361 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27362 Type *FPTy = Elts[0]->getType();
27363 const DataLayout &TD = DAG.getDataLayout();
27364
27365 // Create a ConstantArray of the two constants.
27366 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27367 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27368 TD.getPrefTypeAlign(FPTy));
27369 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27370
27371 // Get offsets to the 0 and 1 elements of the array, so we can select between
27372 // them.
27373 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27374 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27375 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27376 SDValue Cond =
27377 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27378 AddToWorklist(Cond.getNode());
27379 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27380 AddToWorklist(CstOffset.getNode());
27381 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27382 AddToWorklist(CPIdx.getNode());
27383 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27385 DAG.getMachineFunction()), Alignment);
27386}
27387
27388/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27389/// where 'cond' is the comparison specified by CC.
27390SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27392 bool NotExtCompare) {
27393 // (x ? y : y) -> y.
27394 if (N2 == N3) return N2;
27395
27396 EVT CmpOpVT = N0.getValueType();
27397 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27398 EVT VT = N2.getValueType();
27399 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27400 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27401 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27402
27403 // Determine if the condition we're dealing with is constant.
27404 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27405 AddToWorklist(SCC.getNode());
27406 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27407 // fold select_cc true, x, y -> x
27408 // fold select_cc false, x, y -> y
27409 return !(SCCC->isZero()) ? N2 : N3;
27410 }
27411 }
27412
27413 if (SDValue V =
27414 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27415 return V;
27416
27417 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27418 return V;
27419
27420 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27421 // where y is has a single bit set.
27422 // A plaintext description would be, we can turn the SELECT_CC into an AND
27423 // when the condition can be materialized as an all-ones register. Any
27424 // single bit-test can be materialized as an all-ones register with
27425 // shift-left and shift-right-arith.
27426 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27427 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27428 SDValue AndLHS = N0->getOperand(0);
27429 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27430 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27431 // Shift the tested bit over the sign bit.
27432 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27433 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27434 unsigned ShCt = AndMask.getBitWidth() - 1;
27435 SDValue ShlAmt =
27436 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27437 getShiftAmountTy(AndLHS.getValueType()));
27438 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27439
27440 // Now arithmetic right shift it all the way over, so the result is
27441 // either all-ones, or zero.
27442 SDValue ShrAmt =
27443 DAG.getConstant(ShCt, SDLoc(Shl),
27445 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27446
27447 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27448 }
27449 }
27450 }
27451
27452 // fold select C, 16, 0 -> shl C, 4
27453 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27454 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27455
27456 if ((Fold || Swap) &&
27457 TLI.getBooleanContents(CmpOpVT) ==
27459 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27460
27461 if (Swap) {
27462 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27463 std::swap(N2C, N3C);
27464 }
27465
27466 // If the caller doesn't want us to simplify this into a zext of a compare,
27467 // don't do it.
27468 if (NotExtCompare && N2C->isOne())
27469 return SDValue();
27470
27471 SDValue Temp, SCC;
27472 // zext (setcc n0, n1)
27473 if (LegalTypes) {
27474 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27475 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27476 } else {
27477 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27478 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27479 }
27480
27481 AddToWorklist(SCC.getNode());
27482 AddToWorklist(Temp.getNode());
27483
27484 if (N2C->isOne())
27485 return Temp;
27486
27487 unsigned ShCt = N2C->getAPIntValue().logBase2();
27488 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27489 return SDValue();
27490
27491 // shl setcc result by log2 n2c
27492 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27493 DAG.getConstant(ShCt, SDLoc(Temp),
27495 }
27496
27497 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27498 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27499 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27500 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27501 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27502 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27503 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27504 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27505 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27506 SDValue ValueOnZero = N2;
27507 SDValue Count = N3;
27508 // If the condition is NE instead of E, swap the operands.
27509 if (CC == ISD::SETNE)
27510 std::swap(ValueOnZero, Count);
27511 // Check if the value on zero is a constant equal to the bits in the type.
27512 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27513 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27514 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27515 // legal, combine to just cttz.
27516 if ((Count.getOpcode() == ISD::CTTZ ||
27517 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27518 N0 == Count.getOperand(0) &&
27519 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27520 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27521 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27522 // legal, combine to just ctlz.
27523 if ((Count.getOpcode() == ISD::CTLZ ||
27524 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27525 N0 == Count.getOperand(0) &&
27526 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27527 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27528 }
27529 }
27530 }
27531
27532 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27533 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27534 if (!NotExtCompare && N1C && N2C && N3C &&
27535 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27536 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27537 (N1C->isZero() && CC == ISD::SETLT)) &&
27538 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27539 SDValue ASR = DAG.getNode(
27540 ISD::SRA, DL, CmpOpVT, N0,
27541 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27542 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27543 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27544 }
27545
27546 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27547 return S;
27548 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27549 return S;
27550
27551 return SDValue();
27552}
27553
27554/// This is a stub for TargetLowering::SimplifySetCC.
27555SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27556 ISD::CondCode Cond, const SDLoc &DL,
27557 bool foldBooleans) {
27559 DagCombineInfo(DAG, Level, false, this);
27560 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27561}
27562
27563/// Given an ISD::SDIV node expressing a divide by constant, return
27564/// a DAG expression to select that will generate the same value by multiplying
27565/// by a magic number.
27566/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27567SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27568 // when optimising for minimum size, we don't want to expand a div to a mul
27569 // and a shift.
27571 return SDValue();
27572
27574 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27575 for (SDNode *N : Built)
27576 AddToWorklist(N);
27577 return S;
27578 }
27579
27580 return SDValue();
27581}
27582
27583/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27584/// DAG expression that will generate the same value by right shifting.
27585SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27586 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27587 if (!C)
27588 return SDValue();
27589
27590 // Avoid division by zero.
27591 if (C->isZero())
27592 return SDValue();
27593
27595 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27596 for (SDNode *N : Built)
27597 AddToWorklist(N);
27598 return S;
27599 }
27600
27601 return SDValue();
27602}
27603
27604/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27605/// expression that will generate the same value by multiplying by a magic
27606/// number.
27607/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27608SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27609 // when optimising for minimum size, we don't want to expand a div to a mul
27610 // and a shift.
27612 return SDValue();
27613
27615 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27616 for (SDNode *N : Built)
27617 AddToWorklist(N);
27618 return S;
27619 }
27620
27621 return SDValue();
27622}
27623
27624/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27625/// return a DAG expression that will generate the same value.
27626SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27627 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27628 if (!C)
27629 return SDValue();
27630
27631 // Avoid division by zero.
27632 if (C->isZero())
27633 return SDValue();
27634
27636 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27637 for (SDNode *N : Built)
27638 AddToWorklist(N);
27639 return S;
27640 }
27641
27642 return SDValue();
27643}
27644
27645// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27646//
27647// Returns the node that represents `Log2(Op)`. This may create a new node. If
27648// we are unable to compute `Log2(Op)` its return `SDValue()`.
27649//
27650// All nodes will be created at `DL` and the output will be of type `VT`.
27651//
27652// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27653// `AssumeNonZero` if this function should simply assume (not require proving
27654// `Op` is non-zero).
27656 SDValue Op, unsigned Depth,
27657 bool AssumeNonZero) {
27658 assert(VT.isInteger() && "Only integer types are supported!");
27659
27660 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27661 while (true) {
27662 switch (V.getOpcode()) {
27663 case ISD::TRUNCATE:
27664 case ISD::ZERO_EXTEND:
27665 V = V.getOperand(0);
27666 break;
27667 default:
27668 return V;
27669 }
27670 }
27671 };
27672
27673 if (VT.isScalableVector())
27674 return SDValue();
27675
27676 Op = PeekThroughCastsAndTrunc(Op);
27677
27678 // Helper for determining whether a value is a power-2 constant scalar or a
27679 // vector of such elements.
27680 SmallVector<APInt> Pow2Constants;
27681 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27682 if (C->isZero() || C->isOpaque())
27683 return false;
27684 // TODO: We may also be able to support negative powers of 2 here.
27685 if (C->getAPIntValue().isPowerOf2()) {
27686 Pow2Constants.emplace_back(C->getAPIntValue());
27687 return true;
27688 }
27689 return false;
27690 };
27691
27692 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27693 if (!VT.isVector())
27694 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27695 // We need to create a build vector
27696 SmallVector<SDValue> Log2Ops;
27697 for (const APInt &Pow2 : Pow2Constants)
27698 Log2Ops.emplace_back(
27699 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27700 return DAG.getBuildVector(VT, DL, Log2Ops);
27701 }
27702
27703 if (Depth >= DAG.MaxRecursionDepth)
27704 return SDValue();
27705
27706 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27707 ToCast = PeekThroughCastsAndTrunc(ToCast);
27708 EVT CurVT = ToCast.getValueType();
27709 if (NewVT == CurVT)
27710 return ToCast;
27711
27712 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27713 return DAG.getBitcast(NewVT, ToCast);
27714
27715 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27716 };
27717
27718 // log2(X << Y) -> log2(X) + Y
27719 if (Op.getOpcode() == ISD::SHL) {
27720 // 1 << Y and X nuw/nsw << Y are all non-zero.
27721 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27722 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27723 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27724 Depth + 1, AssumeNonZero))
27725 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27726 CastToVT(VT, Op.getOperand(1)));
27727 }
27728
27729 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27730 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27731 Op.hasOneUse()) {
27732 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27733 Depth + 1, AssumeNonZero))
27734 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27735 Depth + 1, AssumeNonZero))
27736 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27737 }
27738
27739 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27740 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27741 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27742 Op.hasOneUse()) {
27743 // Use AssumeNonZero as false here. Otherwise we can hit case where
27744 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27745 if (SDValue LogX =
27746 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27747 /*AssumeNonZero*/ false))
27748 if (SDValue LogY =
27749 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27750 /*AssumeNonZero*/ false))
27751 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27752 }
27753
27754 return SDValue();
27755}
27756
27757/// Determines the LogBase2 value for a non-null input value using the
27758/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27759SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27760 bool KnownNonZero, bool InexpensiveOnly,
27761 std::optional<EVT> OutVT) {
27762 EVT VT = OutVT ? *OutVT : V.getValueType();
27763 SDValue InexpensiveLogBase2 =
27764 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27765 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27766 return InexpensiveLogBase2;
27767
27768 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27769 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27770 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27771 return LogBase2;
27772}
27773
27774/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27775/// For the reciprocal, we need to find the zero of the function:
27776/// F(X) = 1/X - A [which has a zero at X = 1/A]
27777/// =>
27778/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27779/// does not require additional intermediate precision]
27780/// For the last iteration, put numerator N into it to gain more precision:
27781/// Result = N X_i + X_i (N - N A X_i)
27782SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27783 SDNodeFlags Flags) {
27784 if (LegalDAG)
27785 return SDValue();
27786
27787 // TODO: Handle extended types?
27788 EVT VT = Op.getValueType();
27789 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27790 VT.getScalarType() != MVT::f64)
27791 return SDValue();
27792
27793 // If estimates are explicitly disabled for this function, we're done.
27795 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27796 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27797 return SDValue();
27798
27799 // Estimates may be explicitly enabled for this type with a custom number of
27800 // refinement steps.
27801 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27802 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27803 AddToWorklist(Est.getNode());
27804
27805 SDLoc DL(Op);
27806 if (Iterations) {
27807 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27808
27809 // Newton iterations: Est = Est + Est (N - Arg * Est)
27810 // If this is the last iteration, also multiply by the numerator.
27811 for (int i = 0; i < Iterations; ++i) {
27812 SDValue MulEst = Est;
27813
27814 if (i == Iterations - 1) {
27815 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27816 AddToWorklist(MulEst.getNode());
27817 }
27818
27819 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27820 AddToWorklist(NewEst.getNode());
27821
27822 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27823 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27824 AddToWorklist(NewEst.getNode());
27825
27826 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27827 AddToWorklist(NewEst.getNode());
27828
27829 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27830 AddToWorklist(Est.getNode());
27831 }
27832 } else {
27833 // If no iterations are available, multiply with N.
27834 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27835 AddToWorklist(Est.getNode());
27836 }
27837
27838 return Est;
27839 }
27840
27841 return SDValue();
27842}
27843
27844/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27845/// For the reciprocal sqrt, we need to find the zero of the function:
27846/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27847/// =>
27848/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27849/// As a result, we precompute A/2 prior to the iteration loop.
27850SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27851 unsigned Iterations,
27852 SDNodeFlags Flags, bool Reciprocal) {
27853 EVT VT = Arg.getValueType();
27854 SDLoc DL(Arg);
27855 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27856
27857 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27858 // this entire sequence requires only one FP constant.
27859 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27860 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27861
27862 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27863 for (unsigned i = 0; i < Iterations; ++i) {
27864 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27865 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27866 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27867 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27868 }
27869
27870 // If non-reciprocal square root is requested, multiply the result by Arg.
27871 if (!Reciprocal)
27872 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27873
27874 return Est;
27875}
27876
27877/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27878/// For the reciprocal sqrt, we need to find the zero of the function:
27879/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27880/// =>
27881/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27882SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27883 unsigned Iterations,
27884 SDNodeFlags Flags, bool Reciprocal) {
27885 EVT VT = Arg.getValueType();
27886 SDLoc DL(Arg);
27887 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27888 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27889
27890 // This routine must enter the loop below to work correctly
27891 // when (Reciprocal == false).
27892 assert(Iterations > 0);
27893
27894 // Newton iterations for reciprocal square root:
27895 // E = (E * -0.5) * ((A * E) * E + -3.0)
27896 for (unsigned i = 0; i < Iterations; ++i) {
27897 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27898 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27899 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27900
27901 // When calculating a square root at the last iteration build:
27902 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27903 // (notice a common subexpression)
27904 SDValue LHS;
27905 if (Reciprocal || (i + 1) < Iterations) {
27906 // RSQRT: LHS = (E * -0.5)
27907 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27908 } else {
27909 // SQRT: LHS = (A * E) * -0.5
27910 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27911 }
27912
27913 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27914 }
27915
27916 return Est;
27917}
27918
27919/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27920/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27921/// Op can be zero.
27922SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27923 bool Reciprocal) {
27924 if (LegalDAG)
27925 return SDValue();
27926
27927 // TODO: Handle extended types?
27928 EVT VT = Op.getValueType();
27929 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27930 VT.getScalarType() != MVT::f64)
27931 return SDValue();
27932
27933 // If estimates are explicitly disabled for this function, we're done.
27935 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27936 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27937 return SDValue();
27938
27939 // Estimates may be explicitly enabled for this type with a custom number of
27940 // refinement steps.
27941 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27942
27943 bool UseOneConstNR = false;
27944 if (SDValue Est =
27945 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27946 Reciprocal)) {
27947 AddToWorklist(Est.getNode());
27948
27949 if (Iterations > 0)
27950 Est = UseOneConstNR
27951 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27952 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27953 if (!Reciprocal) {
27954 SDLoc DL(Op);
27955 // Try the target specific test first.
27956 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27957
27958 // The estimate is now completely wrong if the input was exactly 0.0 or
27959 // possibly a denormal. Force the answer to 0.0 or value provided by
27960 // target for those cases.
27961 Est = DAG.getNode(
27962 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27963 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27964 }
27965 return Est;
27966 }
27967
27968 return SDValue();
27969}
27970
27971SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27972 return buildSqrtEstimateImpl(Op, Flags, true);
27973}
27974
27975SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27976 return buildSqrtEstimateImpl(Op, Flags, false);
27977}
27978
27979/// Return true if there is any possibility that the two addresses overlap.
27980bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27981
27982 struct MemUseCharacteristics {
27983 bool IsVolatile;
27984 bool IsAtomic;
27986 int64_t Offset;
27987 LocationSize NumBytes;
27988 MachineMemOperand *MMO;
27989 };
27990
27991 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27992 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27993 int64_t Offset = 0;
27994 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27995 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
27996 : (LSN->getAddressingMode() == ISD::PRE_DEC)
27997 ? -1 * C->getSExtValue()
27998 : 0;
27999 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28000 return {LSN->isVolatile(), LSN->isAtomic(),
28001 LSN->getBasePtr(), Offset /*base offset*/,
28002 LocationSize::precise(Size), LSN->getMemOperand()};
28003 }
28004 if (const auto *LN = cast<LifetimeSDNode>(N))
28005 return {false /*isVolatile*/,
28006 /*isAtomic*/ false,
28007 LN->getOperand(1),
28008 (LN->hasOffset()) ? LN->getOffset() : 0,
28009 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28011 (MachineMemOperand *)nullptr};
28012 // Default.
28013 return {false /*isvolatile*/,
28014 /*isAtomic*/ false,
28015 SDValue(),
28016 (int64_t)0 /*offset*/,
28018 (MachineMemOperand *)nullptr};
28019 };
28020
28021 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28022 MUC1 = getCharacteristics(Op1);
28023
28024 // If they are to the same address, then they must be aliases.
28025 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28026 MUC0.Offset == MUC1.Offset)
28027 return true;
28028
28029 // If they are both volatile then they cannot be reordered.
28030 if (MUC0.IsVolatile && MUC1.IsVolatile)
28031 return true;
28032
28033 // Be conservative about atomics for the moment
28034 // TODO: This is way overconservative for unordered atomics (see D66309)
28035 if (MUC0.IsAtomic && MUC1.IsAtomic)
28036 return true;
28037
28038 if (MUC0.MMO && MUC1.MMO) {
28039 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28040 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28041 return false;
28042 }
28043
28044 // If NumBytes is scalable and offset is not 0, conservatively return may
28045 // alias
28046 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28047 MUC0.Offset != 0) ||
28048 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28049 MUC1.Offset != 0))
28050 return true;
28051 // Try to prove that there is aliasing, or that there is no aliasing. Either
28052 // way, we can return now. If nothing can be proved, proceed with more tests.
28053 bool IsAlias;
28054 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28055 DAG, IsAlias))
28056 return IsAlias;
28057
28058 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28059 // either are not known.
28060 if (!MUC0.MMO || !MUC1.MMO)
28061 return true;
28062
28063 // If one operation reads from invariant memory, and the other may store, they
28064 // cannot alias. These should really be checking the equivalent of mayWrite,
28065 // but it only matters for memory nodes other than load /store.
28066 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28067 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28068 return false;
28069
28070 // If we know required SrcValue1 and SrcValue2 have relatively large
28071 // alignment compared to the size and offset of the access, we may be able
28072 // to prove they do not alias. This check is conservative for now to catch
28073 // cases created by splitting vector types, it only works when the offsets are
28074 // multiples of the size of the data.
28075 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28076 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28077 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28078 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28079 LocationSize Size0 = MUC0.NumBytes;
28080 LocationSize Size1 = MUC1.NumBytes;
28081
28082 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28083 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28084 !Size1.isScalable() && Size0 == Size1 &&
28085 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28086 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28087 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28088 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28089 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28090
28091 // There is no overlap between these relatively aligned accesses of
28092 // similar size. Return no alias.
28093 if ((OffAlign0 + static_cast<int64_t>(
28094 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28095 (OffAlign1 + static_cast<int64_t>(
28096 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28097 return false;
28098 }
28099
28100 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28102 : DAG.getSubtarget().useAA();
28103#ifndef NDEBUG
28104 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28106 UseAA = false;
28107#endif
28108
28109 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28110 Size0.hasValue() && Size1.hasValue()) {
28111 // Use alias analysis information.
28112 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28113 int64_t Overlap0 =
28114 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28115 int64_t Overlap1 =
28116 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28117 LocationSize Loc0 =
28118 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28119 LocationSize Loc1 =
28120 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28121 if (AA->isNoAlias(
28122 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28123 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28124 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28125 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28126 return false;
28127 }
28128
28129 // Otherwise we have to assume they alias.
28130 return true;
28131}
28132
28133/// Walk up chain skipping non-aliasing memory nodes,
28134/// looking for aliasing nodes and adding them to the Aliases vector.
28135void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28136 SmallVectorImpl<SDValue> &Aliases) {
28137 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28138 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28139
28140 // Get alias information for node.
28141 // TODO: relax aliasing for unordered atomics (see D66309)
28142 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28143
28144 // Starting off.
28145 Chains.push_back(OriginalChain);
28146 unsigned Depth = 0;
28147
28148 // Attempt to improve chain by a single step
28149 auto ImproveChain = [&](SDValue &C) -> bool {
28150 switch (C.getOpcode()) {
28151 case ISD::EntryToken:
28152 // No need to mark EntryToken.
28153 C = SDValue();
28154 return true;
28155 case ISD::LOAD:
28156 case ISD::STORE: {
28157 // Get alias information for C.
28158 // TODO: Relax aliasing for unordered atomics (see D66309)
28159 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28160 cast<LSBaseSDNode>(C.getNode())->isSimple();
28161 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28162 // Look further up the chain.
28163 C = C.getOperand(0);
28164 return true;
28165 }
28166 // Alias, so stop here.
28167 return false;
28168 }
28169
28170 case ISD::CopyFromReg:
28171 // Always forward past CopyFromReg.
28172 C = C.getOperand(0);
28173 return true;
28174
28176 case ISD::LIFETIME_END: {
28177 // We can forward past any lifetime start/end that can be proven not to
28178 // alias the memory access.
28179 if (!mayAlias(N, C.getNode())) {
28180 // Look further up the chain.
28181 C = C.getOperand(0);
28182 return true;
28183 }
28184 return false;
28185 }
28186 default:
28187 return false;
28188 }
28189 };
28190
28191 // Look at each chain and determine if it is an alias. If so, add it to the
28192 // aliases list. If not, then continue up the chain looking for the next
28193 // candidate.
28194 while (!Chains.empty()) {
28195 SDValue Chain = Chains.pop_back_val();
28196
28197 // Don't bother if we've seen Chain before.
28198 if (!Visited.insert(Chain.getNode()).second)
28199 continue;
28200
28201 // For TokenFactor nodes, look at each operand and only continue up the
28202 // chain until we reach the depth limit.
28203 //
28204 // FIXME: The depth check could be made to return the last non-aliasing
28205 // chain we found before we hit a tokenfactor rather than the original
28206 // chain.
28207 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28208 Aliases.clear();
28209 Aliases.push_back(OriginalChain);
28210 return;
28211 }
28212
28213 if (Chain.getOpcode() == ISD::TokenFactor) {
28214 // We have to check each of the operands of the token factor for "small"
28215 // token factors, so we queue them up. Adding the operands to the queue
28216 // (stack) in reverse order maintains the original order and increases the
28217 // likelihood that getNode will find a matching token factor (CSE.)
28218 if (Chain.getNumOperands() > 16) {
28219 Aliases.push_back(Chain);
28220 continue;
28221 }
28222 for (unsigned n = Chain.getNumOperands(); n;)
28223 Chains.push_back(Chain.getOperand(--n));
28224 ++Depth;
28225 continue;
28226 }
28227 // Everything else
28228 if (ImproveChain(Chain)) {
28229 // Updated Chain Found, Consider new chain if one exists.
28230 if (Chain.getNode())
28231 Chains.push_back(Chain);
28232 ++Depth;
28233 continue;
28234 }
28235 // No Improved Chain Possible, treat as Alias.
28236 Aliases.push_back(Chain);
28237 }
28238}
28239
28240/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28241/// (aliasing node.)
28242SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28243 if (OptLevel == CodeGenOptLevel::None)
28244 return OldChain;
28245
28246 // Ops for replacing token factor.
28248
28249 // Accumulate all the aliases to this node.
28250 GatherAllAliases(N, OldChain, Aliases);
28251
28252 // If no operands then chain to entry token.
28253 if (Aliases.empty())
28254 return DAG.getEntryNode();
28255
28256 // If a single operand then chain to it. We don't need to revisit it.
28257 if (Aliases.size() == 1)
28258 return Aliases[0];
28259
28260 // Construct a custom tailored token factor.
28261 return DAG.getTokenFactor(SDLoc(N), Aliases);
28262}
28263
28264// This function tries to collect a bunch of potentially interesting
28265// nodes to improve the chains of, all at once. This might seem
28266// redundant, as this function gets called when visiting every store
28267// node, so why not let the work be done on each store as it's visited?
28268//
28269// I believe this is mainly important because mergeConsecutiveStores
28270// is unable to deal with merging stores of different sizes, so unless
28271// we improve the chains of all the potential candidates up-front
28272// before running mergeConsecutiveStores, it might only see some of
28273// the nodes that will eventually be candidates, and then not be able
28274// to go from a partially-merged state to the desired final
28275// fully-merged state.
28276
28277bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28278 SmallVector<StoreSDNode *, 8> ChainedStores;
28279 StoreSDNode *STChain = St;
28280 // Intervals records which offsets from BaseIndex have been covered. In
28281 // the common case, every store writes to the immediately previous address
28282 // space and thus merged with the previous interval at insertion time.
28283
28284 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28286 IMap::Allocator A;
28287 IMap Intervals(A);
28288
28289 // This holds the base pointer, index, and the offset in bytes from the base
28290 // pointer.
28292
28293 // We must have a base and an offset.
28294 if (!BasePtr.getBase().getNode())
28295 return false;
28296
28297 // Do not handle stores to undef base pointers.
28298 if (BasePtr.getBase().isUndef())
28299 return false;
28300
28301 // Do not handle stores to opaque types
28302 if (St->getMemoryVT().isZeroSized())
28303 return false;
28304
28305 // BaseIndexOffset assumes that offsets are fixed-size, which
28306 // is not valid for scalable vectors where the offsets are
28307 // scaled by `vscale`, so bail out early.
28308 if (St->getMemoryVT().isScalableVT())
28309 return false;
28310
28311 // Add ST's interval.
28312 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28313 std::monostate{});
28314
28315 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28316 if (Chain->getMemoryVT().isScalableVector())
28317 return false;
28318
28319 // If the chain has more than one use, then we can't reorder the mem ops.
28320 if (!SDValue(Chain, 0)->hasOneUse())
28321 break;
28322 // TODO: Relax for unordered atomics (see D66309)
28323 if (!Chain->isSimple() || Chain->isIndexed())
28324 break;
28325
28326 // Find the base pointer and offset for this memory node.
28327 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28328 // Check that the base pointer is the same as the original one.
28329 int64_t Offset;
28330 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28331 break;
28332 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28333 // Make sure we don't overlap with other intervals by checking the ones to
28334 // the left or right before inserting.
28335 auto I = Intervals.find(Offset);
28336 // If there's a next interval, we should end before it.
28337 if (I != Intervals.end() && I.start() < (Offset + Length))
28338 break;
28339 // If there's a previous interval, we should start after it.
28340 if (I != Intervals.begin() && (--I).stop() <= Offset)
28341 break;
28342 Intervals.insert(Offset, Offset + Length, std::monostate{});
28343
28344 ChainedStores.push_back(Chain);
28345 STChain = Chain;
28346 }
28347
28348 // If we didn't find a chained store, exit.
28349 if (ChainedStores.empty())
28350 return false;
28351
28352 // Improve all chained stores (St and ChainedStores members) starting from
28353 // where the store chain ended and return single TokenFactor.
28354 SDValue NewChain = STChain->getChain();
28356 for (unsigned I = ChainedStores.size(); I;) {
28357 StoreSDNode *S = ChainedStores[--I];
28358 SDValue BetterChain = FindBetterChain(S, NewChain);
28359 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28360 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28361 TFOps.push_back(SDValue(S, 0));
28362 ChainedStores[I] = S;
28363 }
28364
28365 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28366 SDValue BetterChain = FindBetterChain(St, NewChain);
28367 SDValue NewST;
28368 if (St->isTruncatingStore())
28369 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28370 St->getBasePtr(), St->getMemoryVT(),
28371 St->getMemOperand());
28372 else
28373 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28374 St->getBasePtr(), St->getMemOperand());
28375
28376 TFOps.push_back(NewST);
28377
28378 // If we improved every element of TFOps, then we've lost the dependence on
28379 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28380 // the beginning to keep relative order consistent with FindBetterChains.
28381 auto hasImprovedChain = [&](SDValue ST) -> bool {
28382 return ST->getOperand(0) != NewChain;
28383 };
28384 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28385 if (AddNewChain)
28386 TFOps.insert(TFOps.begin(), NewChain);
28387
28388 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28389 CombineTo(St, TF);
28390
28391 // Add TF and its operands to the worklist.
28392 AddToWorklist(TF.getNode());
28393 for (const SDValue &Op : TF->ops())
28394 AddToWorklist(Op.getNode());
28395 AddToWorklist(STChain);
28396 return true;
28397}
28398
28399bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28400 if (OptLevel == CodeGenOptLevel::None)
28401 return false;
28402
28404
28405 // We must have a base and an offset.
28406 if (!BasePtr.getBase().getNode())
28407 return false;
28408
28409 // Do not handle stores to undef base pointers.
28410 if (BasePtr.getBase().isUndef())
28411 return false;
28412
28413 // Directly improve a chain of disjoint stores starting at St.
28414 if (parallelizeChainedStores(St))
28415 return true;
28416
28417 // Improve St's Chain..
28418 SDValue BetterChain = FindBetterChain(St, St->getChain());
28419 if (St->getChain() != BetterChain) {
28420 replaceStoreChain(St, BetterChain);
28421 return true;
28422 }
28423 return false;
28424}
28425
28426/// This is the entry point for the file.
28428 CodeGenOptLevel OptLevel) {
28429 /// This is the main entry point to this class.
28430 DAGCombiner(*this, AA, OptLevel).Run(Level);
28431}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:361
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isADDLike(SDValue Op) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:479
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:908
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:217
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:368
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:374
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:360
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1311
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1198
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1014
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:261
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:945
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1147
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1019
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:314
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1496
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1606
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1581
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1601
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1422
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:933
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:836
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:553
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:854
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1525
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:889
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1475
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:307
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...