LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitATOMIC_STORE(SDNode *N);
534 SDValue visitLIFETIME_END(SDNode *N);
535 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
536 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
537 SDValue visitBUILD_VECTOR(SDNode *N);
538 SDValue visitCONCAT_VECTORS(SDNode *N);
539 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_SHUFFLE(SDNode *N);
541 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
542 SDValue visitINSERT_SUBVECTOR(SDNode *N);
543 SDValue visitMLOAD(SDNode *N);
544 SDValue visitMSTORE(SDNode *N);
545 SDValue visitMGATHER(SDNode *N);
546 SDValue visitMSCATTER(SDNode *N);
547 SDValue visitVPGATHER(SDNode *N);
548 SDValue visitVPSCATTER(SDNode *N);
549 SDValue visitVP_STRIDED_LOAD(SDNode *N);
550 SDValue visitVP_STRIDED_STORE(SDNode *N);
551 SDValue visitFP_TO_FP16(SDNode *N);
552 SDValue visitFP16_TO_FP(SDNode *N);
553 SDValue visitFP_TO_BF16(SDNode *N);
554 SDValue visitBF16_TO_FP(SDNode *N);
555 SDValue visitVECREDUCE(SDNode *N);
556 SDValue visitVPOp(SDNode *N);
557 SDValue visitGET_FPENV_MEM(SDNode *N);
558 SDValue visitSET_FPENV_MEM(SDNode *N);
559
560 template <class MatchContextClass>
561 SDValue visitFADDForFMACombine(SDNode *N);
562 template <class MatchContextClass>
563 SDValue visitFSUBForFMACombine(SDNode *N);
564 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
565
566 SDValue XformToShuffleWithZero(SDNode *N);
567 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
568 const SDLoc &DL,
569 SDNode *N,
570 SDValue N0,
571 SDValue N1);
572 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
575 SDValue N1, SDNodeFlags Flags);
576 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
577 EVT VT, SDValue N0, SDValue N1,
578 SDNodeFlags Flags = SDNodeFlags());
579
580 SDValue visitShiftByConstant(SDNode *N);
581
582 SDValue foldSelectOfConstants(SDNode *N);
583 SDValue foldVSelectOfConstants(SDNode *N);
584 SDValue foldBinOpIntoSelect(SDNode *BO);
585 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
586 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
587 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
588 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
590 bool NotExtCompare = false);
591 SDValue convertSelectOfFPConstantsToLoadOffset(
592 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
594 SDValue foldSignChangeInBitcast(SDNode *N);
595 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
597 SDValue foldSelectOfBinops(SDNode *N);
598 SDValue foldSextSetcc(SDNode *N);
599 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
600 const SDLoc &DL);
601 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
602 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Returns a type large enough to hold any valid shift amount - before type
850 /// legalization these can be huge.
851 EVT getShiftAmountTy(EVT LHSTy) {
852 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
853 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
854 }
855
856 /// This method returns true if we are running before type legalization or
857 /// if the specified VT is legal.
858 bool isTypeLegal(const EVT &VT) {
859 if (!LegalTypes) return true;
860 return TLI.isTypeLegal(VT);
861 }
862
863 /// Convenience wrapper around TargetLowering::getSetCCResultType
864 EVT getSetCCResultType(EVT VT) const {
865 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
866 }
867
868 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
869 SDValue OrigLoad, SDValue ExtLoad,
870 ISD::NodeType ExtType);
871 };
872
873/// This class is a DAGUpdateListener that removes any deleted
874/// nodes from the worklist.
875class WorklistRemover : public SelectionDAG::DAGUpdateListener {
876 DAGCombiner &DC;
877
878public:
879 explicit WorklistRemover(DAGCombiner &dc)
880 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
881
882 void NodeDeleted(SDNode *N, SDNode *E) override {
883 DC.removeFromWorklist(N);
884 }
885};
886
887class WorklistInserter : public SelectionDAG::DAGUpdateListener {
888 DAGCombiner &DC;
889
890public:
891 explicit WorklistInserter(DAGCombiner &dc)
892 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
893
894 // FIXME: Ideally we could add N to the worklist, but this causes exponential
895 // compile time costs in large DAGs, e.g. Halide.
896 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
897};
898
899} // end anonymous namespace
900
901//===----------------------------------------------------------------------===//
902// TargetLowering::DAGCombinerInfo implementation
903//===----------------------------------------------------------------------===//
904
906 ((DAGCombiner*)DC)->AddToWorklist(N);
907}
908
910CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
911 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
912}
913
915CombineTo(SDNode *N, SDValue Res, bool AddTo) {
916 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
917}
918
920CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
922}
923
926 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
927}
928
931 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
932}
933
934//===----------------------------------------------------------------------===//
935// Helper Functions
936//===----------------------------------------------------------------------===//
937
938void DAGCombiner::deleteAndRecombine(SDNode *N) {
939 removeFromWorklist(N);
940
941 // If the operands of this node are only used by the node, they will now be
942 // dead. Make sure to re-visit them and recursively delete dead nodes.
943 for (const SDValue &Op : N->ops())
944 // For an operand generating multiple values, one of the values may
945 // become dead allowing further simplification (e.g. split index
946 // arithmetic from an indexed load).
947 if (Op->hasOneUse() || Op->getNumValues() > 1)
948 AddToWorklist(Op.getNode());
949
950 DAG.DeleteNode(N);
951}
952
953// APInts must be the same size for most operations, this helper
954// function zero extends the shorter of the pair so that they match.
955// We provide an Offset so that we can create bitwidths that won't overflow.
956static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
957 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
958 LHS = LHS.zext(Bits);
959 RHS = RHS.zext(Bits);
960}
961
962// Return true if this node is a setcc, or is a select_cc
963// that selects between the target values used for true and false, making it
964// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
965// the appropriate nodes based on the type of node we are checking. This
966// simplifies life a bit for the callers.
967bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
968 SDValue &CC, bool MatchStrict) const {
969 if (N.getOpcode() == ISD::SETCC) {
970 LHS = N.getOperand(0);
971 RHS = N.getOperand(1);
972 CC = N.getOperand(2);
973 return true;
974 }
975
976 if (MatchStrict &&
977 (N.getOpcode() == ISD::STRICT_FSETCC ||
978 N.getOpcode() == ISD::STRICT_FSETCCS)) {
979 LHS = N.getOperand(1);
980 RHS = N.getOperand(2);
981 CC = N.getOperand(3);
982 return true;
983 }
984
985 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
986 !TLI.isConstFalseVal(N.getOperand(3)))
987 return false;
988
989 if (TLI.getBooleanContents(N.getValueType()) ==
991 return false;
992
993 LHS = N.getOperand(0);
994 RHS = N.getOperand(1);
995 CC = N.getOperand(4);
996 return true;
997}
998
999/// Return true if this is a SetCC-equivalent operation with only one use.
1000/// If this is true, it allows the users to invert the operation for free when
1001/// it is profitable to do so.
1002bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1003 SDValue N0, N1, N2;
1004 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1005 return true;
1006 return false;
1007}
1008
1010 if (!ScalarTy.isSimple())
1011 return false;
1012
1013 uint64_t MaskForTy = 0ULL;
1014 switch (ScalarTy.getSimpleVT().SimpleTy) {
1015 case MVT::i8:
1016 MaskForTy = 0xFFULL;
1017 break;
1018 case MVT::i16:
1019 MaskForTy = 0xFFFFULL;
1020 break;
1021 case MVT::i32:
1022 MaskForTy = 0xFFFFFFFFULL;
1023 break;
1024 default:
1025 return false;
1026 break;
1027 }
1028
1029 APInt Val;
1030 if (ISD::isConstantSplatVector(N, Val))
1031 return Val.getLimitedValue() == MaskForTy;
1032
1033 return false;
1034}
1035
1036// Determines if it is a constant integer or a splat/build vector of constant
1037// integers (and undefs).
1038// Do not permit build vector implicit truncation.
1039static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1040 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1041 return !(Const->isOpaque() && NoOpaques);
1042 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1043 return false;
1044 unsigned BitWidth = N.getScalarValueSizeInBits();
1045 for (const SDValue &Op : N->op_values()) {
1046 if (Op.isUndef())
1047 continue;
1048 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1049 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1050 (Const->isOpaque() && NoOpaques))
1051 return false;
1052 }
1053 return true;
1054}
1055
1056// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1057// undef's.
1058static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 return isConstantOrConstantVector(V, NoOpaques) ||
1063}
1064
1065// Determine if this an indexed load with an opaque target constant index.
1066static bool canSplitIdx(LoadSDNode *LD) {
1067 return MaySplitLoadIndex &&
1068 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1069 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1070}
1071
1072bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1073 const SDLoc &DL,
1074 SDNode *N,
1075 SDValue N0,
1076 SDValue N1) {
1077 // Currently this only tries to ensure we don't undo the GEP splits done by
1078 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1079 // we check if the following transformation would be problematic:
1080 // (load/store (add, (add, x, offset1), offset2)) ->
1081 // (load/store (add, x, offset1+offset2)).
1082
1083 // (load/store (add, (add, x, y), offset2)) ->
1084 // (load/store (add, (add, x, offset2), y)).
1085
1086 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1087 return false;
1088
1089 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1090 if (!C2)
1091 return false;
1092
1093 const APInt &C2APIntVal = C2->getAPIntValue();
1094 if (C2APIntVal.getSignificantBits() > 64)
1095 return false;
1096
1097 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1098 if (N0.hasOneUse())
1099 return false;
1100
1101 const APInt &C1APIntVal = C1->getAPIntValue();
1102 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1103 if (CombinedValueIntVal.getSignificantBits() > 64)
1104 return false;
1105 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1106
1107 for (SDNode *Node : N->uses()) {
1108 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1109 // Is x[offset2] already not a legal addressing mode? If so then
1110 // reassociating the constants breaks nothing (we test offset2 because
1111 // that's the one we hope to fold into the load or store).
1113 AM.HasBaseReg = true;
1114 AM.BaseOffs = C2APIntVal.getSExtValue();
1115 EVT VT = LoadStore->getMemoryVT();
1116 unsigned AS = LoadStore->getAddressSpace();
1117 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1118 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1119 continue;
1120
1121 // Would x[offset1+offset2] still be a legal addressing mode?
1122 AM.BaseOffs = CombinedValue;
1123 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1124 return true;
1125 }
1126 }
1127 } else {
1128 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1129 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1130 return false;
1131
1132 for (SDNode *Node : N->uses()) {
1133 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1134 if (!LoadStore)
1135 return false;
1136
1137 // Is x[offset2] a legal addressing mode? If so then
1138 // reassociating the constants breaks address pattern
1140 AM.HasBaseReg = true;
1141 AM.BaseOffs = C2APIntVal.getSExtValue();
1142 EVT VT = LoadStore->getMemoryVT();
1143 unsigned AS = LoadStore->getAddressSpace();
1144 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1145 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1146 return false;
1147 }
1148 return true;
1149 }
1150
1151 return false;
1152}
1153
1154/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1155/// \p N0 is the same kind of operation as \p Opc.
1156SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1157 SDValue N0, SDValue N1,
1158 SDNodeFlags Flags) {
1159 EVT VT = N0.getValueType();
1160
1161 if (N0.getOpcode() != Opc)
1162 return SDValue();
1163
1164 SDValue N00 = N0.getOperand(0);
1165 SDValue N01 = N0.getOperand(1);
1166
1168 SDNodeFlags NewFlags;
1169 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1170 Flags.hasNoUnsignedWrap())
1171 NewFlags.setNoUnsignedWrap(true);
1172
1174 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1175 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1176 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1177 return SDValue();
1178 }
1179 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1180 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1181 // iff (op x, c1) has one use
1182 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1183 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1184 }
1185 }
1186
1187 // Check for repeated operand logic simplifications.
1188 if (Opc == ISD::AND || Opc == ISD::OR) {
1189 // (N00 & N01) & N00 --> N00 & N01
1190 // (N00 & N01) & N01 --> N00 & N01
1191 // (N00 | N01) | N00 --> N00 | N01
1192 // (N00 | N01) | N01 --> N00 | N01
1193 if (N1 == N00 || N1 == N01)
1194 return N0;
1195 }
1196 if (Opc == ISD::XOR) {
1197 // (N00 ^ N01) ^ N00 --> N01
1198 if (N1 == N00)
1199 return N01;
1200 // (N00 ^ N01) ^ N01 --> N00
1201 if (N1 == N01)
1202 return N00;
1203 }
1204
1205 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1206 if (N1 != N01) {
1207 // Reassociate if (op N00, N1) already exist
1208 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1209 // if Op (Op N00, N1), N01 already exist
1210 // we need to stop reassciate to avoid dead loop
1211 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1212 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1213 }
1214 }
1215
1216 if (N1 != N00) {
1217 // Reassociate if (op N01, N1) already exist
1218 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1219 // if Op (Op N01, N1), N00 already exist
1220 // we need to stop reassciate to avoid dead loop
1221 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1222 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1223 }
1224 }
1225
1226 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1227 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1228 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1229 // comparisons with the same predicate. This enables optimizations as the
1230 // following one:
1231 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1232 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1233 if (Opc == ISD::AND || Opc == ISD::OR) {
1234 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1235 N01->getOpcode() == ISD::SETCC) {
1236 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1237 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1238 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1239 if (CC1 == CC00 && CC1 != CC01) {
1240 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1241 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1242 }
1243 if (CC1 == CC01 && CC1 != CC00) {
1244 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1245 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1246 }
1247 }
1248 }
1249 }
1250
1251 return SDValue();
1252}
1253
1254/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1255/// same kind of operation as \p Opc.
1256SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1257 SDValue N1, SDNodeFlags Flags) {
1258 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1259
1260 // Floating-point reassociation is not allowed without loose FP math.
1261 if (N0.getValueType().isFloatingPoint() ||
1263 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1264 return SDValue();
1265
1266 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1267 return Combined;
1268 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1269 return Combined;
1270 return SDValue();
1271}
1272
1273// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1274// Note that we only expect Flags to be passed from FP operations. For integer
1275// operations they need to be dropped.
1276SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1277 const SDLoc &DL, EVT VT, SDValue N0,
1278 SDValue N1, SDNodeFlags Flags) {
1279 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1280 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1281 N0->hasOneUse() && N1->hasOneUse() &&
1283 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1284 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1285 return DAG.getNode(RedOpc, DL, VT,
1286 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1287 N0.getOperand(0), N1.getOperand(0)));
1288 }
1289 return SDValue();
1290}
1291
1292SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1293 bool AddTo) {
1294 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1295 ++NodesCombined;
1296 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1297 To[0].dump(&DAG);
1298 dbgs() << " and " << NumTo - 1 << " other values\n");
1299 for (unsigned i = 0, e = NumTo; i != e; ++i)
1300 assert((!To[i].getNode() ||
1301 N->getValueType(i) == To[i].getValueType()) &&
1302 "Cannot combine value to value of different type!");
1303
1304 WorklistRemover DeadNodes(*this);
1305 DAG.ReplaceAllUsesWith(N, To);
1306 if (AddTo) {
1307 // Push the new nodes and any users onto the worklist
1308 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1309 if (To[i].getNode())
1310 AddToWorklistWithUsers(To[i].getNode());
1311 }
1312 }
1313
1314 // Finally, if the node is now dead, remove it from the graph. The node
1315 // may not be dead if the replacement process recursively simplified to
1316 // something else needing this node.
1317 if (N->use_empty())
1318 deleteAndRecombine(N);
1319 return SDValue(N, 0);
1320}
1321
1322void DAGCombiner::
1323CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1324 // Replace the old value with the new one.
1325 ++NodesCombined;
1326 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1327 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1328
1329 // Replace all uses.
1330 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1331
1332 // Push the new node and any (possibly new) users onto the worklist.
1333 AddToWorklistWithUsers(TLO.New.getNode());
1334
1335 // Finally, if the node is now dead, remove it from the graph.
1336 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1337}
1338
1339/// Check the specified integer node value to see if it can be simplified or if
1340/// things it uses can be simplified by bit propagation. If so, return true.
1341bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1342 const APInt &DemandedElts,
1343 bool AssumeSingleUse) {
1344 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1345 KnownBits Known;
1346 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1347 AssumeSingleUse))
1348 return false;
1349
1350 // Revisit the node.
1351 AddToWorklist(Op.getNode());
1352
1353 CommitTargetLoweringOpt(TLO);
1354 return true;
1355}
1356
1357/// Check the specified vector node value to see if it can be simplified or
1358/// if things it uses can be simplified as it only uses some of the elements.
1359/// If so, return true.
1360bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1361 const APInt &DemandedElts,
1362 bool AssumeSingleUse) {
1363 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1364 APInt KnownUndef, KnownZero;
1365 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1366 TLO, 0, AssumeSingleUse))
1367 return false;
1368
1369 // Revisit the node.
1370 AddToWorklist(Op.getNode());
1371
1372 CommitTargetLoweringOpt(TLO);
1373 return true;
1374}
1375
1376void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1377 SDLoc DL(Load);
1378 EVT VT = Load->getValueType(0);
1379 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1380
1381 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1382 Trunc.dump(&DAG); dbgs() << '\n');
1383
1384 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1385 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1386
1387 AddToWorklist(Trunc.getNode());
1388 recursivelyDeleteUnusedNodes(Load);
1389}
1390
1391SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1392 Replace = false;
1393 SDLoc DL(Op);
1394 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1395 LoadSDNode *LD = cast<LoadSDNode>(Op);
1396 EVT MemVT = LD->getMemoryVT();
1398 : LD->getExtensionType();
1399 Replace = true;
1400 return DAG.getExtLoad(ExtType, DL, PVT,
1401 LD->getChain(), LD->getBasePtr(),
1402 MemVT, LD->getMemOperand());
1403 }
1404
1405 unsigned Opc = Op.getOpcode();
1406 switch (Opc) {
1407 default: break;
1408 case ISD::AssertSext:
1409 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1410 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1411 break;
1412 case ISD::AssertZext:
1413 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1414 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1415 break;
1416 case ISD::Constant: {
1417 unsigned ExtOpc =
1418 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1419 return DAG.getNode(ExtOpc, DL, PVT, Op);
1420 }
1421 }
1422
1423 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1424 return SDValue();
1425 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1426}
1427
1428SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1430 return SDValue();
1431 EVT OldVT = Op.getValueType();
1432 SDLoc DL(Op);
1433 bool Replace = false;
1434 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1435 if (!NewOp.getNode())
1436 return SDValue();
1437 AddToWorklist(NewOp.getNode());
1438
1439 if (Replace)
1440 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1441 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1442 DAG.getValueType(OldVT));
1443}
1444
1445SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1446 EVT OldVT = Op.getValueType();
1447 SDLoc DL(Op);
1448 bool Replace = false;
1449 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1450 if (!NewOp.getNode())
1451 return SDValue();
1452 AddToWorklist(NewOp.getNode());
1453
1454 if (Replace)
1455 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1456 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1457}
1458
1459/// Promote the specified integer binary operation if the target indicates it is
1460/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1461/// i32 since i16 instructions are longer.
1462SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1463 if (!LegalOperations)
1464 return SDValue();
1465
1466 EVT VT = Op.getValueType();
1467 if (VT.isVector() || !VT.isInteger())
1468 return SDValue();
1469
1470 // If operation type is 'undesirable', e.g. i16 on x86, consider
1471 // promoting it.
1472 unsigned Opc = Op.getOpcode();
1473 if (TLI.isTypeDesirableForOp(Opc, VT))
1474 return SDValue();
1475
1476 EVT PVT = VT;
1477 // Consult target whether it is a good idea to promote this operation and
1478 // what's the right type to promote it to.
1479 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1480 assert(PVT != VT && "Don't know what type to promote to!");
1481
1482 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1483
1484 bool Replace0 = false;
1485 SDValue N0 = Op.getOperand(0);
1486 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1487
1488 bool Replace1 = false;
1489 SDValue N1 = Op.getOperand(1);
1490 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1491 SDLoc DL(Op);
1492
1493 SDValue RV =
1494 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1495
1496 // We are always replacing N0/N1's use in N and only need additional
1497 // replacements if there are additional uses.
1498 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1499 // (SDValue) here because the node may reference multiple values
1500 // (for example, the chain value of a load node).
1501 Replace0 &= !N0->hasOneUse();
1502 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1503
1504 // Combine Op here so it is preserved past replacements.
1505 CombineTo(Op.getNode(), RV);
1506
1507 // If operands have a use ordering, make sure we deal with
1508 // predecessor first.
1509 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1510 std::swap(N0, N1);
1511 std::swap(NN0, NN1);
1512 }
1513
1514 if (Replace0) {
1515 AddToWorklist(NN0.getNode());
1516 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1517 }
1518 if (Replace1) {
1519 AddToWorklist(NN1.getNode());
1520 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1521 }
1522 return Op;
1523 }
1524 return SDValue();
1525}
1526
1527/// Promote the specified integer shift operation if the target indicates it is
1528/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1529/// i32 since i16 instructions are longer.
1530SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1531 if (!LegalOperations)
1532 return SDValue();
1533
1534 EVT VT = Op.getValueType();
1535 if (VT.isVector() || !VT.isInteger())
1536 return SDValue();
1537
1538 // If operation type is 'undesirable', e.g. i16 on x86, consider
1539 // promoting it.
1540 unsigned Opc = Op.getOpcode();
1541 if (TLI.isTypeDesirableForOp(Opc, VT))
1542 return SDValue();
1543
1544 EVT PVT = VT;
1545 // Consult target whether it is a good idea to promote this operation and
1546 // what's the right type to promote it to.
1547 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1548 assert(PVT != VT && "Don't know what type to promote to!");
1549
1550 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1551
1552 bool Replace = false;
1553 SDValue N0 = Op.getOperand(0);
1554 if (Opc == ISD::SRA)
1555 N0 = SExtPromoteOperand(N0, PVT);
1556 else if (Opc == ISD::SRL)
1557 N0 = ZExtPromoteOperand(N0, PVT);
1558 else
1559 N0 = PromoteOperand(N0, PVT, Replace);
1560
1561 if (!N0.getNode())
1562 return SDValue();
1563
1564 SDLoc DL(Op);
1565 SDValue N1 = Op.getOperand(1);
1566 SDValue RV =
1567 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1568
1569 if (Replace)
1570 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1571
1572 // Deal with Op being deleted.
1573 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1574 return RV;
1575 }
1576 return SDValue();
1577}
1578
1579SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1580 if (!LegalOperations)
1581 return SDValue();
1582
1583 EVT VT = Op.getValueType();
1584 if (VT.isVector() || !VT.isInteger())
1585 return SDValue();
1586
1587 // If operation type is 'undesirable', e.g. i16 on x86, consider
1588 // promoting it.
1589 unsigned Opc = Op.getOpcode();
1590 if (TLI.isTypeDesirableForOp(Opc, VT))
1591 return SDValue();
1592
1593 EVT PVT = VT;
1594 // Consult target whether it is a good idea to promote this operation and
1595 // what's the right type to promote it to.
1596 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1597 assert(PVT != VT && "Don't know what type to promote to!");
1598 // fold (aext (aext x)) -> (aext x)
1599 // fold (aext (zext x)) -> (zext x)
1600 // fold (aext (sext x)) -> (sext x)
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1603 }
1604 return SDValue();
1605}
1606
1607bool DAGCombiner::PromoteLoad(SDValue Op) {
1608 if (!LegalOperations)
1609 return false;
1610
1611 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1612 return false;
1613
1614 EVT VT = Op.getValueType();
1615 if (VT.isVector() || !VT.isInteger())
1616 return false;
1617
1618 // If operation type is 'undesirable', e.g. i16 on x86, consider
1619 // promoting it.
1620 unsigned Opc = Op.getOpcode();
1621 if (TLI.isTypeDesirableForOp(Opc, VT))
1622 return false;
1623
1624 EVT PVT = VT;
1625 // Consult target whether it is a good idea to promote this operation and
1626 // what's the right type to promote it to.
1627 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1628 assert(PVT != VT && "Don't know what type to promote to!");
1629
1630 SDLoc DL(Op);
1631 SDNode *N = Op.getNode();
1632 LoadSDNode *LD = cast<LoadSDNode>(N);
1633 EVT MemVT = LD->getMemoryVT();
1635 : LD->getExtensionType();
1636 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1637 LD->getChain(), LD->getBasePtr(),
1638 MemVT, LD->getMemOperand());
1639 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1640
1641 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1642 Result.dump(&DAG); dbgs() << '\n');
1643
1645 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1646
1647 AddToWorklist(Result.getNode());
1648 recursivelyDeleteUnusedNodes(N);
1649 return true;
1650 }
1651
1652 return false;
1653}
1654
1655/// Recursively delete a node which has no uses and any operands for
1656/// which it is the only use.
1657///
1658/// Note that this both deletes the nodes and removes them from the worklist.
1659/// It also adds any nodes who have had a user deleted to the worklist as they
1660/// may now have only one use and subject to other combines.
1661bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1662 if (!N->use_empty())
1663 return false;
1664
1666 Nodes.insert(N);
1667 do {
1668 N = Nodes.pop_back_val();
1669 if (!N)
1670 continue;
1671
1672 if (N->use_empty()) {
1673 for (const SDValue &ChildN : N->op_values())
1674 Nodes.insert(ChildN.getNode());
1675
1676 removeFromWorklist(N);
1677 DAG.DeleteNode(N);
1678 } else {
1679 AddToWorklist(N);
1680 }
1681 } while (!Nodes.empty());
1682 return true;
1683}
1684
1685//===----------------------------------------------------------------------===//
1686// Main DAG Combiner implementation
1687//===----------------------------------------------------------------------===//
1688
1689void DAGCombiner::Run(CombineLevel AtLevel) {
1690 // set the instance variables, so that the various visit routines may use it.
1691 Level = AtLevel;
1692 LegalDAG = Level >= AfterLegalizeDAG;
1693 LegalOperations = Level >= AfterLegalizeVectorOps;
1694 LegalTypes = Level >= AfterLegalizeTypes;
1695
1696 WorklistInserter AddNodes(*this);
1697
1698 // Add all the dag nodes to the worklist.
1699 //
1700 // Note: All nodes are not added to PruningList here, this is because the only
1701 // nodes which can be deleted are those which have no uses and all other nodes
1702 // which would otherwise be added to the worklist by the first call to
1703 // getNextWorklistEntry are already present in it.
1704 for (SDNode &Node : DAG.allnodes())
1705 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1706
1707 // Create a dummy node (which is not added to allnodes), that adds a reference
1708 // to the root node, preventing it from being deleted, and tracking any
1709 // changes of the root.
1710 HandleSDNode Dummy(DAG.getRoot());
1711
1712 // While we have a valid worklist entry node, try to combine it.
1713 while (SDNode *N = getNextWorklistEntry()) {
1714 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1715 // N is deleted from the DAG, since they too may now be dead or may have a
1716 // reduced number of uses, allowing other xforms.
1717 if (recursivelyDeleteUnusedNodes(N))
1718 continue;
1719
1720 WorklistRemover DeadNodes(*this);
1721
1722 // If this combine is running after legalizing the DAG, re-legalize any
1723 // nodes pulled off the worklist.
1724 if (LegalDAG) {
1725 SmallSetVector<SDNode *, 16> UpdatedNodes;
1726 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1727
1728 for (SDNode *LN : UpdatedNodes)
1729 AddToWorklistWithUsers(LN);
1730
1731 if (!NIsValid)
1732 continue;
1733 }
1734
1735 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1736
1737 // Add any operands of the new node which have not yet been combined to the
1738 // worklist as well. Because the worklist uniques things already, this
1739 // won't repeatedly process the same operand.
1740 for (const SDValue &ChildN : N->op_values())
1741 if (!CombinedNodes.count(ChildN.getNode()))
1742 AddToWorklist(ChildN.getNode());
1743
1744 CombinedNodes.insert(N);
1745 SDValue RV = combine(N);
1746
1747 if (!RV.getNode())
1748 continue;
1749
1750 ++NodesCombined;
1751
1752 // If we get back the same node we passed in, rather than a new node or
1753 // zero, we know that the node must have defined multiple values and
1754 // CombineTo was used. Since CombineTo takes care of the worklist
1755 // mechanics for us, we have no work to do in this case.
1756 if (RV.getNode() == N)
1757 continue;
1758
1759 assert(N->getOpcode() != ISD::DELETED_NODE &&
1760 RV.getOpcode() != ISD::DELETED_NODE &&
1761 "Node was deleted but visit returned new node!");
1762
1763 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1764
1765 if (N->getNumValues() == RV->getNumValues())
1766 DAG.ReplaceAllUsesWith(N, RV.getNode());
1767 else {
1768 assert(N->getValueType(0) == RV.getValueType() &&
1769 N->getNumValues() == 1 && "Type mismatch");
1770 DAG.ReplaceAllUsesWith(N, &RV);
1771 }
1772
1773 // Push the new node and any users onto the worklist. Omit this if the
1774 // new node is the EntryToken (e.g. if a store managed to get optimized
1775 // out), because re-visiting the EntryToken and its users will not uncover
1776 // any additional opportunities, but there may be a large number of such
1777 // users, potentially causing compile time explosion.
1778 if (RV.getOpcode() != ISD::EntryToken)
1779 AddToWorklistWithUsers(RV.getNode());
1780
1781 // Finally, if the node is now dead, remove it from the graph. The node
1782 // may not be dead if the replacement process recursively simplified to
1783 // something else needing this node. This will also take care of adding any
1784 // operands which have lost a user to the worklist.
1785 recursivelyDeleteUnusedNodes(N);
1786 }
1787
1788 // If the root changed (e.g. it was a dead load, update the root).
1789 DAG.setRoot(Dummy.getValue());
1790 DAG.RemoveDeadNodes();
1791}
1792
1793SDValue DAGCombiner::visit(SDNode *N) {
1794 // clang-format off
1795 switch (N->getOpcode()) {
1796 default: break;
1797 case ISD::TokenFactor: return visitTokenFactor(N);
1798 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1799 case ISD::ADD: return visitADD(N);
1800 case ISD::SUB: return visitSUB(N);
1801 case ISD::SADDSAT:
1802 case ISD::UADDSAT: return visitADDSAT(N);
1803 case ISD::SSUBSAT:
1804 case ISD::USUBSAT: return visitSUBSAT(N);
1805 case ISD::ADDC: return visitADDC(N);
1806 case ISD::SADDO:
1807 case ISD::UADDO: return visitADDO(N);
1808 case ISD::SUBC: return visitSUBC(N);
1809 case ISD::SSUBO:
1810 case ISD::USUBO: return visitSUBO(N);
1811 case ISD::ADDE: return visitADDE(N);
1812 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1813 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1814 case ISD::SUBE: return visitSUBE(N);
1815 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1816 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1817 case ISD::SMULFIX:
1818 case ISD::SMULFIXSAT:
1819 case ISD::UMULFIX:
1820 case ISD::UMULFIXSAT: return visitMULFIX(N);
1821 case ISD::MUL: return visitMUL(N);
1822 case ISD::SDIV: return visitSDIV(N);
1823 case ISD::UDIV: return visitUDIV(N);
1824 case ISD::SREM:
1825 case ISD::UREM: return visitREM(N);
1826 case ISD::MULHU: return visitMULHU(N);
1827 case ISD::MULHS: return visitMULHS(N);
1828 case ISD::AVGFLOORS:
1829 case ISD::AVGFLOORU:
1830 case ISD::AVGCEILS:
1831 case ISD::AVGCEILU: return visitAVG(N);
1832 case ISD::ABDS:
1833 case ISD::ABDU: return visitABD(N);
1834 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1835 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1836 case ISD::SMULO:
1837 case ISD::UMULO: return visitMULO(N);
1838 case ISD::SMIN:
1839 case ISD::SMAX:
1840 case ISD::UMIN:
1841 case ISD::UMAX: return visitIMINMAX(N);
1842 case ISD::AND: return visitAND(N);
1843 case ISD::OR: return visitOR(N);
1844 case ISD::XOR: return visitXOR(N);
1845 case ISD::SHL: return visitSHL(N);
1846 case ISD::SRA: return visitSRA(N);
1847 case ISD::SRL: return visitSRL(N);
1848 case ISD::ROTR:
1849 case ISD::ROTL: return visitRotate(N);
1850 case ISD::FSHL:
1851 case ISD::FSHR: return visitFunnelShift(N);
1852 case ISD::SSHLSAT:
1853 case ISD::USHLSAT: return visitSHLSAT(N);
1854 case ISD::ABS: return visitABS(N);
1855 case ISD::BSWAP: return visitBSWAP(N);
1856 case ISD::BITREVERSE: return visitBITREVERSE(N);
1857 case ISD::CTLZ: return visitCTLZ(N);
1858 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1859 case ISD::CTTZ: return visitCTTZ(N);
1860 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1861 case ISD::CTPOP: return visitCTPOP(N);
1862 case ISD::SELECT: return visitSELECT(N);
1863 case ISD::VSELECT: return visitVSELECT(N);
1864 case ISD::SELECT_CC: return visitSELECT_CC(N);
1865 case ISD::SETCC: return visitSETCC(N);
1866 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1867 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1868 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1869 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1870 case ISD::AssertSext:
1871 case ISD::AssertZext: return visitAssertExt(N);
1872 case ISD::AssertAlign: return visitAssertAlign(N);
1873 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1876 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1877 case ISD::TRUNCATE: return visitTRUNCATE(N);
1878 case ISD::BITCAST: return visitBITCAST(N);
1879 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1880 case ISD::FADD: return visitFADD(N);
1881 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1882 case ISD::FSUB: return visitFSUB(N);
1883 case ISD::FMUL: return visitFMUL(N);
1884 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1885 case ISD::FMAD: return visitFMAD(N);
1886 case ISD::FDIV: return visitFDIV(N);
1887 case ISD::FREM: return visitFREM(N);
1888 case ISD::FSQRT: return visitFSQRT(N);
1889 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1890 case ISD::FPOW: return visitFPOW(N);
1891 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1892 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1893 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1894 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1895 case ISD::LRINT:
1896 case ISD::LLRINT: return visitXRINT(N);
1897 case ISD::FP_ROUND: return visitFP_ROUND(N);
1898 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1899 case ISD::FNEG: return visitFNEG(N);
1900 case ISD::FABS: return visitFABS(N);
1901 case ISD::FFLOOR: return visitFFLOOR(N);
1902 case ISD::FMINNUM:
1903 case ISD::FMAXNUM:
1904 case ISD::FMINIMUM:
1905 case ISD::FMAXIMUM: return visitFMinMax(N);
1906 case ISD::FCEIL: return visitFCEIL(N);
1907 case ISD::FTRUNC: return visitFTRUNC(N);
1908 case ISD::FFREXP: return visitFFREXP(N);
1909 case ISD::BRCOND: return visitBRCOND(N);
1910 case ISD::BR_CC: return visitBR_CC(N);
1911 case ISD::LOAD: return visitLOAD(N);
1912 case ISD::STORE: return visitSTORE(N);
1913 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1914 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1915 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1916 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1917 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1918 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1919 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1920 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1921 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1922 case ISD::MGATHER: return visitMGATHER(N);
1923 case ISD::MLOAD: return visitMLOAD(N);
1924 case ISD::MSCATTER: return visitMSCATTER(N);
1925 case ISD::MSTORE: return visitMSTORE(N);
1926 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1927 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1928 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1929 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1930 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1931 case ISD::FREEZE: return visitFREEZE(N);
1932 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1933 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1936 case ISD::VECREDUCE_ADD:
1937 case ISD::VECREDUCE_MUL:
1938 case ISD::VECREDUCE_AND:
1939 case ISD::VECREDUCE_OR:
1940 case ISD::VECREDUCE_XOR:
1948 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1949#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1950#include "llvm/IR/VPIntrinsics.def"
1951 return visitVPOp(N);
1952 }
1953 // clang-format on
1954 return SDValue();
1955}
1956
1957SDValue DAGCombiner::combine(SDNode *N) {
1958 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1959 return SDValue();
1960
1961 SDValue RV;
1962 if (!DisableGenericCombines)
1963 RV = visit(N);
1964
1965 // If nothing happened, try a target-specific DAG combine.
1966 if (!RV.getNode()) {
1967 assert(N->getOpcode() != ISD::DELETED_NODE &&
1968 "Node was deleted but visit returned NULL!");
1969
1970 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1971 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1972
1973 // Expose the DAG combiner to the target combiner impls.
1975 DagCombineInfo(DAG, Level, false, this);
1976
1977 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1978 }
1979 }
1980
1981 // If nothing happened still, try promoting the operation.
1982 if (!RV.getNode()) {
1983 switch (N->getOpcode()) {
1984 default: break;
1985 case ISD::ADD:
1986 case ISD::SUB:
1987 case ISD::MUL:
1988 case ISD::AND:
1989 case ISD::OR:
1990 case ISD::XOR:
1991 RV = PromoteIntBinOp(SDValue(N, 0));
1992 break;
1993 case ISD::SHL:
1994 case ISD::SRA:
1995 case ISD::SRL:
1996 RV = PromoteIntShiftOp(SDValue(N, 0));
1997 break;
1998 case ISD::SIGN_EXTEND:
1999 case ISD::ZERO_EXTEND:
2000 case ISD::ANY_EXTEND:
2001 RV = PromoteExtend(SDValue(N, 0));
2002 break;
2003 case ISD::LOAD:
2004 if (PromoteLoad(SDValue(N, 0)))
2005 RV = SDValue(N, 0);
2006 break;
2007 }
2008 }
2009
2010 // If N is a commutative binary node, try to eliminate it if the commuted
2011 // version is already present in the DAG.
2012 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2013 SDValue N0 = N->getOperand(0);
2014 SDValue N1 = N->getOperand(1);
2015
2016 // Constant operands are canonicalized to RHS.
2017 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2018 SDValue Ops[] = {N1, N0};
2019 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2020 N->getFlags());
2021 if (CSENode)
2022 return SDValue(CSENode, 0);
2023 }
2024 }
2025
2026 return RV;
2027}
2028
2029/// Given a node, return its input chain if it has one, otherwise return a null
2030/// sd operand.
2032 if (unsigned NumOps = N->getNumOperands()) {
2033 if (N->getOperand(0).getValueType() == MVT::Other)
2034 return N->getOperand(0);
2035 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2036 return N->getOperand(NumOps-1);
2037 for (unsigned i = 1; i < NumOps-1; ++i)
2038 if (N->getOperand(i).getValueType() == MVT::Other)
2039 return N->getOperand(i);
2040 }
2041 return SDValue();
2042}
2043
2044SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2045 // If N has two operands, where one has an input chain equal to the other,
2046 // the 'other' chain is redundant.
2047 if (N->getNumOperands() == 2) {
2048 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2049 return N->getOperand(0);
2050 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2051 return N->getOperand(1);
2052 }
2053
2054 // Don't simplify token factors if optnone.
2055 if (OptLevel == CodeGenOptLevel::None)
2056 return SDValue();
2057
2058 // Don't simplify the token factor if the node itself has too many operands.
2059 if (N->getNumOperands() > TokenFactorInlineLimit)
2060 return SDValue();
2061
2062 // If the sole user is a token factor, we should make sure we have a
2063 // chance to merge them together. This prevents TF chains from inhibiting
2064 // optimizations.
2065 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2066 AddToWorklist(*(N->use_begin()));
2067
2068 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2069 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2071 bool Changed = false; // If we should replace this token factor.
2072
2073 // Start out with this token factor.
2074 TFs.push_back(N);
2075
2076 // Iterate through token factors. The TFs grows when new token factors are
2077 // encountered.
2078 for (unsigned i = 0; i < TFs.size(); ++i) {
2079 // Limit number of nodes to inline, to avoid quadratic compile times.
2080 // We have to add the outstanding Token Factors to Ops, otherwise we might
2081 // drop Ops from the resulting Token Factors.
2082 if (Ops.size() > TokenFactorInlineLimit) {
2083 for (unsigned j = i; j < TFs.size(); j++)
2084 Ops.emplace_back(TFs[j], 0);
2085 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2086 // combiner worklist later.
2087 TFs.resize(i);
2088 break;
2089 }
2090
2091 SDNode *TF = TFs[i];
2092 // Check each of the operands.
2093 for (const SDValue &Op : TF->op_values()) {
2094 switch (Op.getOpcode()) {
2095 case ISD::EntryToken:
2096 // Entry tokens don't need to be added to the list. They are
2097 // redundant.
2098 Changed = true;
2099 break;
2100
2101 case ISD::TokenFactor:
2102 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2103 // Queue up for processing.
2104 TFs.push_back(Op.getNode());
2105 Changed = true;
2106 break;
2107 }
2108 [[fallthrough]];
2109
2110 default:
2111 // Only add if it isn't already in the list.
2112 if (SeenOps.insert(Op.getNode()).second)
2113 Ops.push_back(Op);
2114 else
2115 Changed = true;
2116 break;
2117 }
2118 }
2119 }
2120
2121 // Re-visit inlined Token Factors, to clean them up in case they have been
2122 // removed. Skip the first Token Factor, as this is the current node.
2123 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2124 AddToWorklist(TFs[i]);
2125
2126 // Remove Nodes that are chained to another node in the list. Do so
2127 // by walking up chains breath-first stopping when we've seen
2128 // another operand. In general we must climb to the EntryNode, but we can exit
2129 // early if we find all remaining work is associated with just one operand as
2130 // no further pruning is possible.
2131
2132 // List of nodes to search through and original Ops from which they originate.
2134 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2135 SmallPtrSet<SDNode *, 16> SeenChains;
2136 bool DidPruneOps = false;
2137
2138 unsigned NumLeftToConsider = 0;
2139 for (const SDValue &Op : Ops) {
2140 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2141 OpWorkCount.push_back(1);
2142 }
2143
2144 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2145 // If this is an Op, we can remove the op from the list. Remark any
2146 // search associated with it as from the current OpNumber.
2147 if (SeenOps.contains(Op)) {
2148 Changed = true;
2149 DidPruneOps = true;
2150 unsigned OrigOpNumber = 0;
2151 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2152 OrigOpNumber++;
2153 assert((OrigOpNumber != Ops.size()) &&
2154 "expected to find TokenFactor Operand");
2155 // Re-mark worklist from OrigOpNumber to OpNumber
2156 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2157 if (Worklist[i].second == OrigOpNumber) {
2158 Worklist[i].second = OpNumber;
2159 }
2160 }
2161 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2162 OpWorkCount[OrigOpNumber] = 0;
2163 NumLeftToConsider--;
2164 }
2165 // Add if it's a new chain
2166 if (SeenChains.insert(Op).second) {
2167 OpWorkCount[OpNumber]++;
2168 Worklist.push_back(std::make_pair(Op, OpNumber));
2169 }
2170 };
2171
2172 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2173 // We need at least be consider at least 2 Ops to prune.
2174 if (NumLeftToConsider <= 1)
2175 break;
2176 auto CurNode = Worklist[i].first;
2177 auto CurOpNumber = Worklist[i].second;
2178 assert((OpWorkCount[CurOpNumber] > 0) &&
2179 "Node should not appear in worklist");
2180 switch (CurNode->getOpcode()) {
2181 case ISD::EntryToken:
2182 // Hitting EntryToken is the only way for the search to terminate without
2183 // hitting
2184 // another operand's search. Prevent us from marking this operand
2185 // considered.
2186 NumLeftToConsider++;
2187 break;
2188 case ISD::TokenFactor:
2189 for (const SDValue &Op : CurNode->op_values())
2190 AddToWorklist(i, Op.getNode(), CurOpNumber);
2191 break;
2193 case ISD::LIFETIME_END:
2194 case ISD::CopyFromReg:
2195 case ISD::CopyToReg:
2196 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2197 break;
2198 default:
2199 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2200 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2201 break;
2202 }
2203 OpWorkCount[CurOpNumber]--;
2204 if (OpWorkCount[CurOpNumber] == 0)
2205 NumLeftToConsider--;
2206 }
2207
2208 // If we've changed things around then replace token factor.
2209 if (Changed) {
2211 if (Ops.empty()) {
2212 // The entry token is the only possible outcome.
2213 Result = DAG.getEntryNode();
2214 } else {
2215 if (DidPruneOps) {
2216 SmallVector<SDValue, 8> PrunedOps;
2217 //
2218 for (const SDValue &Op : Ops) {
2219 if (SeenChains.count(Op.getNode()) == 0)
2220 PrunedOps.push_back(Op);
2221 }
2222 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2223 } else {
2224 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2225 }
2226 }
2227 return Result;
2228 }
2229 return SDValue();
2230}
2231
2232/// MERGE_VALUES can always be eliminated.
2233SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2234 WorklistRemover DeadNodes(*this);
2235 // Replacing results may cause a different MERGE_VALUES to suddenly
2236 // be CSE'd with N, and carry its uses with it. Iterate until no
2237 // uses remain, to ensure that the node can be safely deleted.
2238 // First add the users of this node to the work list so that they
2239 // can be tried again once they have new operands.
2240 AddUsersToWorklist(N);
2241 do {
2242 // Do as a single replacement to avoid rewalking use lists.
2244 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2245 Ops.push_back(N->getOperand(i));
2246 DAG.ReplaceAllUsesWith(N, Ops.data());
2247 } while (!N->use_empty());
2248 deleteAndRecombine(N);
2249 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2250}
2251
2252/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2253/// ConstantSDNode pointer else nullptr.
2255 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2256 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2257}
2258
2259// isTruncateOf - If N is a truncate of some other value, return true, record
2260// the value being truncated in Op and which of Op's bits are zero/one in Known.
2261// This function computes KnownBits to avoid a duplicated call to
2262// computeKnownBits in the caller.
2264 KnownBits &Known) {
2265 if (N->getOpcode() == ISD::TRUNCATE) {
2266 Op = N->getOperand(0);
2267 Known = DAG.computeKnownBits(Op);
2268 return true;
2269 }
2270
2271 if (N.getOpcode() != ISD::SETCC ||
2272 N.getValueType().getScalarType() != MVT::i1 ||
2273 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2274 return false;
2275
2276 SDValue Op0 = N->getOperand(0);
2277 SDValue Op1 = N->getOperand(1);
2278 assert(Op0.getValueType() == Op1.getValueType());
2279
2280 if (isNullOrNullSplat(Op0))
2281 Op = Op1;
2282 else if (isNullOrNullSplat(Op1))
2283 Op = Op0;
2284 else
2285 return false;
2286
2287 Known = DAG.computeKnownBits(Op);
2288
2289 return (Known.Zero | 1).isAllOnes();
2290}
2291
2292/// Return true if 'Use' is a load or a store that uses N as its base pointer
2293/// and that N may be folded in the load / store addressing mode.
2295 const TargetLowering &TLI) {
2296 EVT VT;
2297 unsigned AS;
2298
2299 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2300 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2301 return false;
2302 VT = LD->getMemoryVT();
2303 AS = LD->getAddressSpace();
2304 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2305 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2306 return false;
2307 VT = ST->getMemoryVT();
2308 AS = ST->getAddressSpace();
2309 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2310 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2311 return false;
2312 VT = LD->getMemoryVT();
2313 AS = LD->getAddressSpace();
2314 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2315 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2316 return false;
2317 VT = ST->getMemoryVT();
2318 AS = ST->getAddressSpace();
2319 } else {
2320 return false;
2321 }
2322
2324 if (N->getOpcode() == ISD::ADD) {
2325 AM.HasBaseReg = true;
2326 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2327 if (Offset)
2328 // [reg +/- imm]
2329 AM.BaseOffs = Offset->getSExtValue();
2330 else
2331 // [reg +/- reg]
2332 AM.Scale = 1;
2333 } else if (N->getOpcode() == ISD::SUB) {
2334 AM.HasBaseReg = true;
2335 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2336 if (Offset)
2337 // [reg +/- imm]
2338 AM.BaseOffs = -Offset->getSExtValue();
2339 else
2340 // [reg +/- reg]
2341 AM.Scale = 1;
2342 } else {
2343 return false;
2344 }
2345
2346 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2347 VT.getTypeForEVT(*DAG.getContext()), AS);
2348}
2349
2350/// This inverts a canonicalization in IR that replaces a variable select arm
2351/// with an identity constant. Codegen improves if we re-use the variable
2352/// operand rather than load a constant. This can also be converted into a
2353/// masked vector operation if the target supports it.
2355 bool ShouldCommuteOperands) {
2356 // Match a select as operand 1. The identity constant that we are looking for
2357 // is only valid as operand 1 of a non-commutative binop.
2358 SDValue N0 = N->getOperand(0);
2359 SDValue N1 = N->getOperand(1);
2360 if (ShouldCommuteOperands)
2361 std::swap(N0, N1);
2362
2363 // TODO: Should this apply to scalar select too?
2364 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2365 return SDValue();
2366
2367 // We can't hoist all instructions because of immediate UB (not speculatable).
2368 // For example div/rem by zero.
2370 return SDValue();
2371
2372 unsigned Opcode = N->getOpcode();
2373 EVT VT = N->getValueType(0);
2374 SDValue Cond = N1.getOperand(0);
2375 SDValue TVal = N1.getOperand(1);
2376 SDValue FVal = N1.getOperand(2);
2377
2378 // This transform increases uses of N0, so freeze it to be safe.
2379 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2380 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2381 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2382 SDValue F0 = DAG.getFreeze(N0);
2383 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2384 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2385 }
2386 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2387 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2388 SDValue F0 = DAG.getFreeze(N0);
2389 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2390 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2391 }
2392
2393 return SDValue();
2394}
2395
2396SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2397 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2398 "Unexpected binary operator");
2399
2400 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2401 auto BinOpcode = BO->getOpcode();
2402 EVT VT = BO->getValueType(0);
2403 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2404 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2405 return Sel;
2406
2407 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2408 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2409 return Sel;
2410 }
2411
2412 // Don't do this unless the old select is going away. We want to eliminate the
2413 // binary operator, not replace a binop with a select.
2414 // TODO: Handle ISD::SELECT_CC.
2415 unsigned SelOpNo = 0;
2416 SDValue Sel = BO->getOperand(0);
2417 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2418 SelOpNo = 1;
2419 Sel = BO->getOperand(1);
2420
2421 // Peek through trunc to shift amount type.
2422 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2423 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2424 // This is valid when the truncated bits of x are already zero.
2425 SDValue Op;
2426 KnownBits Known;
2427 if (isTruncateOf(DAG, Sel, Op, Known) &&
2429 Sel = Op;
2430 }
2431 }
2432
2433 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2434 return SDValue();
2435
2436 SDValue CT = Sel.getOperand(1);
2437 if (!isConstantOrConstantVector(CT, true) &&
2439 return SDValue();
2440
2441 SDValue CF = Sel.getOperand(2);
2442 if (!isConstantOrConstantVector(CF, true) &&
2444 return SDValue();
2445
2446 // Bail out if any constants are opaque because we can't constant fold those.
2447 // The exception is "and" and "or" with either 0 or -1 in which case we can
2448 // propagate non constant operands into select. I.e.:
2449 // and (select Cond, 0, -1), X --> select Cond, 0, X
2450 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2451 bool CanFoldNonConst =
2452 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2455
2456 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2457 if (!CanFoldNonConst &&
2458 !isConstantOrConstantVector(CBO, true) &&
2460 return SDValue();
2461
2462 SDLoc DL(Sel);
2463 SDValue NewCT, NewCF;
2464
2465 if (CanFoldNonConst) {
2466 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2467 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2468 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2469 NewCT = CT;
2470 else
2471 NewCT = CBO;
2472
2473 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2474 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2475 NewCF = CF;
2476 else
2477 NewCF = CBO;
2478 } else {
2479 // We have a select-of-constants followed by a binary operator with a
2480 // constant. Eliminate the binop by pulling the constant math into the
2481 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2482 // CBO, CF + CBO
2483 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2484 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2485 if (!NewCT)
2486 return SDValue();
2487
2488 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2489 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2490 if (!NewCF)
2491 return SDValue();
2492 }
2493
2494 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2495 SelectOp->setFlags(BO->getFlags());
2496 return SelectOp;
2497}
2498
2500 SelectionDAG &DAG) {
2501 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2502 "Expecting add or sub");
2503
2504 // Match a constant operand and a zext operand for the math instruction:
2505 // add Z, C
2506 // sub C, Z
2507 bool IsAdd = N->getOpcode() == ISD::ADD;
2508 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2509 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2510 auto *CN = dyn_cast<ConstantSDNode>(C);
2511 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2512 return SDValue();
2513
2514 // Match the zext operand as a setcc of a boolean.
2515 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2516 Z.getOperand(0).getValueType() != MVT::i1)
2517 return SDValue();
2518
2519 // Match the compare as: setcc (X & 1), 0, eq.
2520 SDValue SetCC = Z.getOperand(0);
2521 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2522 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2523 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2524 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2525 return SDValue();
2526
2527 // We are adding/subtracting a constant and an inverted low bit. Turn that
2528 // into a subtract/add of the low bit with incremented/decremented constant:
2529 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2530 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2531 EVT VT = C.getValueType();
2532 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2533 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2534 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2535 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2536}
2537
2538// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2539SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2540 SDValue N0 = N->getOperand(0);
2541 EVT VT = N0.getValueType();
2542 SDValue A, B;
2543
2544 if (hasOperation(ISD::AVGCEILU, VT) &&
2547 m_SpecificInt(1))))) {
2548 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2549 }
2550 if (hasOperation(ISD::AVGCEILS, VT) &&
2553 m_SpecificInt(1))))) {
2554 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2555 }
2556 return SDValue();
2557}
2558
2559/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2560/// a shift and add with a different constant.
2562 SelectionDAG &DAG) {
2563 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2564 "Expecting add or sub");
2565
2566 // We need a constant operand for the add/sub, and the other operand is a
2567 // logical shift right: add (srl), C or sub C, (srl).
2568 bool IsAdd = N->getOpcode() == ISD::ADD;
2569 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2570 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2571 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2572 ShiftOp.getOpcode() != ISD::SRL)
2573 return SDValue();
2574
2575 // The shift must be of a 'not' value.
2576 SDValue Not = ShiftOp.getOperand(0);
2577 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2578 return SDValue();
2579
2580 // The shift must be moving the sign bit to the least-significant-bit.
2581 EVT VT = ShiftOp.getValueType();
2582 SDValue ShAmt = ShiftOp.getOperand(1);
2583 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2584 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2585 return SDValue();
2586
2587 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2588 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2589 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2590 if (SDValue NewC = DAG.FoldConstantArithmetic(
2591 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2592 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2593 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2594 Not.getOperand(0), ShAmt);
2595 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2596 }
2597
2598 return SDValue();
2599}
2600
2601static bool
2603 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2604 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2605}
2606
2607/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2608/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2609/// are no common bits set in the operands).
2610SDValue DAGCombiner::visitADDLike(SDNode *N) {
2611 SDValue N0 = N->getOperand(0);
2612 SDValue N1 = N->getOperand(1);
2613 EVT VT = N0.getValueType();
2614 SDLoc DL(N);
2615
2616 // fold (add x, undef) -> undef
2617 if (N0.isUndef())
2618 return N0;
2619 if (N1.isUndef())
2620 return N1;
2621
2622 // fold (add c1, c2) -> c1+c2
2623 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2624 return C;
2625
2626 // canonicalize constant to RHS
2629 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2630
2631 if (areBitwiseNotOfEachother(N0, N1))
2633 SDLoc(N), VT);
2634
2635 // fold vector ops
2636 if (VT.isVector()) {
2637 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2638 return FoldedVOp;
2639
2640 // fold (add x, 0) -> x, vector edition
2642 return N0;
2643 }
2644
2645 // fold (add x, 0) -> x
2646 if (isNullConstant(N1))
2647 return N0;
2648
2649 if (N0.getOpcode() == ISD::SUB) {
2650 SDValue N00 = N0.getOperand(0);
2651 SDValue N01 = N0.getOperand(1);
2652
2653 // fold ((A-c1)+c2) -> (A+(c2-c1))
2654 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2655 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2656
2657 // fold ((c1-A)+c2) -> (c1+c2)-A
2658 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2659 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2660 }
2661
2662 // add (sext i1 X), 1 -> zext (not i1 X)
2663 // We don't transform this pattern:
2664 // add (zext i1 X), -1 -> sext (not i1 X)
2665 // because most (?) targets generate better code for the zext form.
2666 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2667 isOneOrOneSplat(N1)) {
2668 SDValue X = N0.getOperand(0);
2669 if ((!LegalOperations ||
2670 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2672 X.getScalarValueSizeInBits() == 1) {
2673 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2674 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2675 }
2676 }
2677
2678 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2679 // iff (or x, c0) is equivalent to (add x, c0).
2680 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2681 // iff (xor x, c0) is equivalent to (add x, c0).
2682 if (DAG.isADDLike(N0)) {
2683 SDValue N01 = N0.getOperand(1);
2684 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2685 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2686 }
2687
2688 if (SDValue NewSel = foldBinOpIntoSelect(N))
2689 return NewSel;
2690
2691 // reassociate add
2692 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2693 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2694 return RADD;
2695
2696 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2697 // equivalent to (add x, c).
2698 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2699 // equivalent to (add x, c).
2700 // Do this optimization only when adding c does not introduce instructions
2701 // for adding carries.
2702 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2703 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2704 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2705 // If N0's type does not split or is a sign mask, it does not introduce
2706 // add carry.
2707 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2708 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2711 if (NoAddCarry)
2712 return DAG.getNode(
2713 ISD::ADD, DL, VT,
2714 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2715 N0.getOperand(1));
2716 }
2717 return SDValue();
2718 };
2719 if (SDValue Add = ReassociateAddOr(N0, N1))
2720 return Add;
2721 if (SDValue Add = ReassociateAddOr(N1, N0))
2722 return Add;
2723
2724 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2725 if (SDValue SD =
2726 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2727 return SD;
2728 }
2729
2730 SDValue A, B, C;
2731
2732 // fold ((0-A) + B) -> B-A
2733 if (sd_match(N0, m_Neg(m_Value(A))))
2734 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2735
2736 // fold (A + (0-B)) -> A-B
2737 if (sd_match(N1, m_Neg(m_Value(B))))
2738 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2739
2740 // fold (A+(B-A)) -> B
2741 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2742 return B;
2743
2744 // fold ((B-A)+A) -> B
2745 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2746 return B;
2747
2748 // fold ((A-B)+(C-A)) -> (C-B)
2749 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2751 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2752
2753 // fold ((A-B)+(B-C)) -> (A-C)
2754 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2756 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2757
2758 // fold (A+(B-(A+C))) to (B-C)
2759 // fold (A+(B-(C+A))) to (B-C)
2760 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2761 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2762
2763 // fold (A+((B-A)+or-C)) to (B+or-C)
2764 if (sd_match(N1,
2766 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2767 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2768
2769 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2770 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2771 N0->hasOneUse() && N1->hasOneUse()) {
2772 SDValue N00 = N0.getOperand(0);
2773 SDValue N01 = N0.getOperand(1);
2774 SDValue N10 = N1.getOperand(0);
2775 SDValue N11 = N1.getOperand(1);
2776
2778 return DAG.getNode(ISD::SUB, DL, VT,
2779 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2780 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2781 }
2782
2783 // fold (add (umax X, C), -C) --> (usubsat X, C)
2784 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2785 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2786 return (!Max && !Op) ||
2787 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2788 };
2789 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2790 /*AllowUndefs*/ true))
2791 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2792 N0.getOperand(1));
2793 }
2794
2796 return SDValue(N, 0);
2797
2798 if (isOneOrOneSplat(N1)) {
2799 // fold (add (xor a, -1), 1) -> (sub 0, a)
2800 if (isBitwiseNot(N0))
2801 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2802 N0.getOperand(0));
2803
2804 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2805 if (N0.getOpcode() == ISD::ADD) {
2806 SDValue A, Xor;
2807
2808 if (isBitwiseNot(N0.getOperand(0))) {
2809 A = N0.getOperand(1);
2810 Xor = N0.getOperand(0);
2811 } else if (isBitwiseNot(N0.getOperand(1))) {
2812 A = N0.getOperand(0);
2813 Xor = N0.getOperand(1);
2814 }
2815
2816 if (Xor)
2817 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2818 }
2819
2820 // Look for:
2821 // add (add x, y), 1
2822 // And if the target does not like this form then turn into:
2823 // sub y, (xor x, -1)
2824 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825 N0.hasOneUse() &&
2826 // Limit this to after legalization if the add has wrap flags
2827 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2828 !N->getFlags().hasNoSignedWrap()))) {
2829 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2830 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2831 }
2832 }
2833
2834 // (x - y) + -1 -> add (xor y, -1), x
2835 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2836 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2837 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2838 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2839 }
2840
2841 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2842 return Combined;
2843
2844 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2845 return Combined;
2846
2847 return SDValue();
2848}
2849
2850// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2851SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2852 SDValue N0 = N->getOperand(0);
2853 EVT VT = N0.getValueType();
2854 SDValue A, B;
2855
2856 if (hasOperation(ISD::AVGFLOORU, VT) &&
2859 m_SpecificInt(1))))) {
2860 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2861 }
2862 if (hasOperation(ISD::AVGFLOORS, VT) &&
2865 m_SpecificInt(1))))) {
2866 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2867 }
2868
2869 return SDValue();
2870}
2871
2872SDValue DAGCombiner::visitADD(SDNode *N) {
2873 SDValue N0 = N->getOperand(0);
2874 SDValue N1 = N->getOperand(1);
2875 EVT VT = N0.getValueType();
2876 SDLoc DL(N);
2877
2878 if (SDValue Combined = visitADDLike(N))
2879 return Combined;
2880
2881 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2882 return V;
2883
2884 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2885 return V;
2886
2887 // Try to match AVGFLOOR fixedwidth pattern
2888 if (SDValue V = foldAddToAvg(N, DL))
2889 return V;
2890
2891 // fold (a+b) -> (a|b) iff a and b share no bits.
2892 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2893 DAG.haveNoCommonBitsSet(N0, N1)) {
2895 Flags.setDisjoint(true);
2896 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2897 }
2898
2899 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2900 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2901 const APInt &C0 = N0->getConstantOperandAPInt(0);
2902 const APInt &C1 = N1->getConstantOperandAPInt(0);
2903 return DAG.getVScale(DL, VT, C0 + C1);
2904 }
2905
2906 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2907 if (N0.getOpcode() == ISD::ADD &&
2908 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2909 N1.getOpcode() == ISD::VSCALE) {
2910 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2911 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2912 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2913 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2914 }
2915
2916 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2917 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2918 N1.getOpcode() == ISD::STEP_VECTOR) {
2919 const APInt &C0 = N0->getConstantOperandAPInt(0);
2920 const APInt &C1 = N1->getConstantOperandAPInt(0);
2921 APInt NewStep = C0 + C1;
2922 return DAG.getStepVector(DL, VT, NewStep);
2923 }
2924
2925 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2926 if (N0.getOpcode() == ISD::ADD &&
2928 N1.getOpcode() == ISD::STEP_VECTOR) {
2929 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2930 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2931 APInt NewStep = SV0 + SV1;
2932 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2933 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2934 }
2935
2936 return SDValue();
2937}
2938
2939SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2940 unsigned Opcode = N->getOpcode();
2941 SDValue N0 = N->getOperand(0);
2942 SDValue N1 = N->getOperand(1);
2943 EVT VT = N0.getValueType();
2944 bool IsSigned = Opcode == ISD::SADDSAT;
2945 SDLoc DL(N);
2946
2947 // fold (add_sat x, undef) -> -1
2948 if (N0.isUndef() || N1.isUndef())
2949 return DAG.getAllOnesConstant(DL, VT);
2950
2951 // fold (add_sat c1, c2) -> c3
2952 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2953 return C;
2954
2955 // canonicalize constant to RHS
2958 return DAG.getNode(Opcode, DL, VT, N1, N0);
2959
2960 // fold vector ops
2961 if (VT.isVector()) {
2962 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2963 return FoldedVOp;
2964
2965 // fold (add_sat x, 0) -> x, vector edition
2967 return N0;
2968 }
2969
2970 // fold (add_sat x, 0) -> x
2971 if (isNullConstant(N1))
2972 return N0;
2973
2974 // If it cannot overflow, transform into an add.
2975 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
2976 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2977
2978 return SDValue();
2979}
2980
2982 bool ForceCarryReconstruction = false) {
2983 bool Masked = false;
2984
2985 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2986 while (true) {
2987 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2988 V = V.getOperand(0);
2989 continue;
2990 }
2991
2992 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2993 if (ForceCarryReconstruction)
2994 return V;
2995
2996 Masked = true;
2997 V = V.getOperand(0);
2998 continue;
2999 }
3000
3001 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3002 return V;
3003
3004 break;
3005 }
3006
3007 // If this is not a carry, return.
3008 if (V.getResNo() != 1)
3009 return SDValue();
3010
3011 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3012 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3013 return SDValue();
3014
3015 EVT VT = V->getValueType(0);
3016 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3017 return SDValue();
3018
3019 // If the result is masked, then no matter what kind of bool it is we can
3020 // return. If it isn't, then we need to make sure the bool type is either 0 or
3021 // 1 and not other values.
3022 if (Masked ||
3023 TLI.getBooleanContents(V.getValueType()) ==
3025 return V;
3026
3027 return SDValue();
3028}
3029
3030/// Given the operands of an add/sub operation, see if the 2nd operand is a
3031/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3032/// the opcode and bypass the mask operation.
3033static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3034 SelectionDAG &DAG, const SDLoc &DL) {
3035 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3036 N1 = N1.getOperand(0);
3037
3038 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3039 return SDValue();
3040
3041 EVT VT = N0.getValueType();
3042 SDValue N10 = N1.getOperand(0);
3043 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3044 N10 = N10.getOperand(0);
3045
3046 if (N10.getValueType() != VT)
3047 return SDValue();
3048
3049 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3050 return SDValue();
3051
3052 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3053 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3054 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3055}
3056
3057/// Helper for doing combines based on N0 and N1 being added to each other.
3058SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3059 SDNode *LocReference) {
3060 EVT VT = N0.getValueType();
3061 SDLoc DL(LocReference);
3062
3063 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3064 SDValue Y, N;
3065 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3066 return DAG.getNode(ISD::SUB, DL, VT, N0,
3067 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3068
3069 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3070 return V;
3071
3072 // Look for:
3073 // add (add x, 1), y
3074 // And if the target does not like this form then turn into:
3075 // sub y, (xor x, -1)
3076 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3077 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3078 // Limit this to after legalization if the add has wrap flags
3079 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3080 !N0->getFlags().hasNoSignedWrap()))) {
3081 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3082 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3083 }
3084
3085 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3086 // Hoist one-use subtraction by non-opaque constant:
3087 // (x - C) + y -> (x + y) - C
3088 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3089 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3090 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3091 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3092 }
3093 // Hoist one-use subtraction from non-opaque constant:
3094 // (C - x) + y -> (y - x) + C
3095 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3096 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3097 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3098 }
3099 }
3100
3101 // add (mul x, C), x -> mul x, C+1
3102 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3103 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3104 N0.hasOneUse()) {
3105 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3106 DAG.getConstant(1, DL, VT));
3107 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3108 }
3109
3110 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3111 // rather than 'add 0/-1' (the zext should get folded).
3112 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3113 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3114 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3116 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3117 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3118 }
3119
3120 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3121 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3122 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3123 if (TN->getVT() == MVT::i1) {
3124 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3125 DAG.getConstant(1, DL, VT));
3126 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3127 }
3128 }
3129
3130 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3131 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3132 N1.getResNo() == 0)
3133 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3134 N0, N1.getOperand(0), N1.getOperand(2));
3135
3136 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3138 if (SDValue Carry = getAsCarry(TLI, N1))
3139 return DAG.getNode(ISD::UADDO_CARRY, DL,
3140 DAG.getVTList(VT, Carry.getValueType()), N0,
3141 DAG.getConstant(0, DL, VT), Carry);
3142
3143 return SDValue();
3144}
3145
3146SDValue DAGCombiner::visitADDC(SDNode *N) {
3147 SDValue N0 = N->getOperand(0);
3148 SDValue N1 = N->getOperand(1);
3149 EVT VT = N0.getValueType();
3150 SDLoc DL(N);
3151
3152 // If the flag result is dead, turn this into an ADD.
3153 if (!N->hasAnyUseOfValue(1))
3154 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3155 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3156
3157 // canonicalize constant to RHS.
3158 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3159 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3160 if (N0C && !N1C)
3161 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3162
3163 // fold (addc x, 0) -> x + no carry out
3164 if (isNullConstant(N1))
3165 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3166 DL, MVT::Glue));
3167
3168 // If it cannot overflow, transform into an add.
3170 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3171 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3172
3173 return SDValue();
3174}
3175
3176/**
3177 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3178 * then the flip also occurs if computing the inverse is the same cost.
3179 * This function returns an empty SDValue in case it cannot flip the boolean
3180 * without increasing the cost of the computation. If you want to flip a boolean
3181 * no matter what, use DAG.getLogicalNOT.
3182 */
3184 const TargetLowering &TLI,
3185 bool Force) {
3186 if (Force && isa<ConstantSDNode>(V))
3187 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3188
3189 if (V.getOpcode() != ISD::XOR)
3190 return SDValue();
3191
3192 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3193 if (!Const)
3194 return SDValue();
3195
3196 EVT VT = V.getValueType();
3197
3198 bool IsFlip = false;
3199 switch(TLI.getBooleanContents(VT)) {
3201 IsFlip = Const->isOne();
3202 break;
3204 IsFlip = Const->isAllOnes();
3205 break;
3207 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3208 break;
3209 }
3210
3211 if (IsFlip)
3212 return V.getOperand(0);
3213 if (Force)
3214 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3215 return SDValue();
3216}
3217
3218SDValue DAGCombiner::visitADDO(SDNode *N) {
3219 SDValue N0 = N->getOperand(0);
3220 SDValue N1 = N->getOperand(1);
3221 EVT VT = N0.getValueType();
3222 bool IsSigned = (ISD::SADDO == N->getOpcode());
3223
3224 EVT CarryVT = N->getValueType(1);
3225 SDLoc DL(N);
3226
3227 // If the flag result is dead, turn this into an ADD.
3228 if (!N->hasAnyUseOfValue(1))
3229 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3230 DAG.getUNDEF(CarryVT));
3231
3232 // canonicalize constant to RHS.
3235 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3236
3237 // fold (addo x, 0) -> x + no carry out
3238 if (isNullOrNullSplat(N1))
3239 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3240
3241 // If it cannot overflow, transform into an add.
3242 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3243 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3244 DAG.getConstant(0, DL, CarryVT));
3245
3246 if (IsSigned) {
3247 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3248 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3249 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3250 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3251 } else {
3252 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3253 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3254 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3255 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3256 return CombineTo(
3257 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3258 }
3259
3260 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3261 return Combined;
3262
3263 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3264 return Combined;
3265 }
3266
3267 return SDValue();
3268}
3269
3270SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3271 EVT VT = N0.getValueType();
3272 if (VT.isVector())
3273 return SDValue();
3274
3275 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3276 // If Y + 1 cannot overflow.
3277 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3278 SDValue Y = N1.getOperand(0);
3279 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3281 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3282 N1.getOperand(2));
3283 }
3284
3285 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3287 if (SDValue Carry = getAsCarry(TLI, N1))
3288 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3289 DAG.getConstant(0, SDLoc(N), VT), Carry);
3290
3291 return SDValue();
3292}
3293
3294SDValue DAGCombiner::visitADDE(SDNode *N) {
3295 SDValue N0 = N->getOperand(0);
3296 SDValue N1 = N->getOperand(1);
3297 SDValue CarryIn = N->getOperand(2);
3298
3299 // canonicalize constant to RHS
3300 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3301 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3302 if (N0C && !N1C)
3303 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3304 N1, N0, CarryIn);
3305
3306 // fold (adde x, y, false) -> (addc x, y)
3307 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3308 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3309
3310 return SDValue();
3311}
3312
3313SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3314 SDValue N0 = N->getOperand(0);
3315 SDValue N1 = N->getOperand(1);
3316 SDValue CarryIn = N->getOperand(2);
3317 SDLoc DL(N);
3318
3319 // canonicalize constant to RHS
3320 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3321 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3322 if (N0C && !N1C)
3323 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3324
3325 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3326 if (isNullConstant(CarryIn)) {
3327 if (!LegalOperations ||
3328 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3329 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3330 }
3331
3332 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3333 if (isNullConstant(N0) && isNullConstant(N1)) {
3334 EVT VT = N0.getValueType();
3335 EVT CarryVT = CarryIn.getValueType();
3336 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3337 AddToWorklist(CarryExt.getNode());
3338 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3339 DAG.getConstant(1, DL, VT)),
3340 DAG.getConstant(0, DL, CarryVT));
3341 }
3342
3343 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3344 return Combined;
3345
3346 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3347 return Combined;
3348
3349 // We want to avoid useless duplication.
3350 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3351 // not a binary operation, this is not really possible to leverage this
3352 // existing mechanism for it. However, if more operations require the same
3353 // deduplication logic, then it may be worth generalize.
3354 SDValue Ops[] = {N1, N0, CarryIn};
3355 SDNode *CSENode =
3356 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3357 if (CSENode)
3358 return SDValue(CSENode, 0);
3359
3360 return SDValue();
3361}
3362
3363/**
3364 * If we are facing some sort of diamond carry propagation pattern try to
3365 * break it up to generate something like:
3366 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3367 *
3368 * The end result is usually an increase in operation required, but because the
3369 * carry is now linearized, other transforms can kick in and optimize the DAG.
3370 *
3371 * Patterns typically look something like
3372 * (uaddo A, B)
3373 * / \
3374 * Carry Sum
3375 * | \
3376 * | (uaddo_carry *, 0, Z)
3377 * | /
3378 * \ Carry
3379 * | /
3380 * (uaddo_carry X, *, *)
3381 *
3382 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3383 * produce a combine with a single path for carry propagation.
3384 */
3386 SelectionDAG &DAG, SDValue X,
3387 SDValue Carry0, SDValue Carry1,
3388 SDNode *N) {
3389 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3390 return SDValue();
3391 if (Carry1.getOpcode() != ISD::UADDO)
3392 return SDValue();
3393
3394 SDValue Z;
3395
3396 /**
3397 * First look for a suitable Z. It will present itself in the form of
3398 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3399 */
3400 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3401 isNullConstant(Carry0.getOperand(1))) {
3402 Z = Carry0.getOperand(2);
3403 } else if (Carry0.getOpcode() == ISD::UADDO &&
3404 isOneConstant(Carry0.getOperand(1))) {
3405 EVT VT = Carry0->getValueType(1);
3406 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3407 } else {
3408 // We couldn't find a suitable Z.
3409 return SDValue();
3410 }
3411
3412
3413 auto cancelDiamond = [&](SDValue A,SDValue B) {
3414 SDLoc DL(N);
3415 SDValue NewY =
3416 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3417 Combiner.AddToWorklist(NewY.getNode());
3418 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3419 DAG.getConstant(0, DL, X.getValueType()),
3420 NewY.getValue(1));
3421 };
3422
3423 /**
3424 * (uaddo A, B)
3425 * |
3426 * Sum
3427 * |
3428 * (uaddo_carry *, 0, Z)
3429 */
3430 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3431 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3432 }
3433
3434 /**
3435 * (uaddo_carry A, 0, Z)
3436 * |
3437 * Sum
3438 * |
3439 * (uaddo *, B)
3440 */
3441 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3442 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3443 }
3444
3445 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3446 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3447 }
3448
3449 return SDValue();
3450}
3451
3452// If we are facing some sort of diamond carry/borrow in/out pattern try to
3453// match patterns like:
3454//
3455// (uaddo A, B) CarryIn
3456// | \ |
3457// | \ |
3458// PartialSum PartialCarryOutX /
3459// | | /
3460// | ____|____________/
3461// | / |
3462// (uaddo *, *) \________
3463// | \ \
3464// | \ |
3465// | PartialCarryOutY |
3466// | \ |
3467// | \ /
3468// AddCarrySum | ______/
3469// | /
3470// CarryOut = (or *, *)
3471//
3472// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3473//
3474// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3475//
3476// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3477// with a single path for carry/borrow out propagation.
3479 SDValue N0, SDValue N1, SDNode *N) {
3480 SDValue Carry0 = getAsCarry(TLI, N0);
3481 if (!Carry0)
3482 return SDValue();
3483 SDValue Carry1 = getAsCarry(TLI, N1);
3484 if (!Carry1)
3485 return SDValue();
3486
3487 unsigned Opcode = Carry0.getOpcode();
3488 if (Opcode != Carry1.getOpcode())
3489 return SDValue();
3490 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3491 return SDValue();
3492 // Guarantee identical type of CarryOut
3493 EVT CarryOutType = N->getValueType(0);
3494 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3495 CarryOutType != Carry1.getValue(1).getValueType())
3496 return SDValue();
3497
3498 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3499 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3500 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3501 std::swap(Carry0, Carry1);
3502
3503 // Check if nodes are connected in expected way.
3504 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3505 Carry1.getOperand(1) != Carry0.getValue(0))
3506 return SDValue();
3507
3508 // The carry in value must be on the righthand side for subtraction.
3509 unsigned CarryInOperandNum =
3510 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3511 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3512 return SDValue();
3513 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3514
3515 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3516 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3517 return SDValue();
3518
3519 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3520 CarryIn = getAsCarry(TLI, CarryIn, true);
3521 if (!CarryIn)
3522 return SDValue();
3523
3524 SDLoc DL(N);
3525 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3526 Carry1->getValueType(0));
3527 SDValue Merged =
3528 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3529 Carry0.getOperand(1), CarryIn);
3530
3531 // Please note that because we have proven that the result of the UADDO/USUBO
3532 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3533 // therefore prove that if the first UADDO/USUBO overflows, the second
3534 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3535 // maximum value.
3536 //
3537 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3538 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3539 //
3540 // This is important because it means that OR and XOR can be used to merge
3541 // carry flags; and that AND can return a constant zero.
3542 //
3543 // TODO: match other operations that can merge flags (ADD, etc)
3544 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3545 if (N->getOpcode() == ISD::AND)
3546 return DAG.getConstant(0, DL, CarryOutType);
3547 return Merged.getValue(1);
3548}
3549
3550SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3551 SDValue CarryIn, SDNode *N) {
3552 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3553 // carry.
3554 if (isBitwiseNot(N0))
3555 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3556 SDLoc DL(N);
3557 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3558 N0.getOperand(0), NotC);
3559 return CombineTo(
3560 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3561 }
3562
3563 // Iff the flag result is dead:
3564 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3565 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3566 // or the dependency between the instructions.
3567 if ((N0.getOpcode() == ISD::ADD ||
3568 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3569 N0.getValue(1) != CarryIn)) &&
3570 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3571 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3572 N0.getOperand(0), N0.getOperand(1), CarryIn);
3573
3574 /**
3575 * When one of the uaddo_carry argument is itself a carry, we may be facing
3576 * a diamond carry propagation. In which case we try to transform the DAG
3577 * to ensure linear carry propagation if that is possible.
3578 */
3579 if (auto Y = getAsCarry(TLI, N1)) {
3580 // Because both are carries, Y and Z can be swapped.
3581 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3582 return R;
3583 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3584 return R;
3585 }
3586
3587 return SDValue();
3588}
3589
3590SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3591 SDValue CarryIn, SDNode *N) {
3592 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3593 if (isBitwiseNot(N0)) {
3594 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3595 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3596 N0.getOperand(0), NotC);
3597 }
3598
3599 return SDValue();
3600}
3601
3602SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3603 SDValue N0 = N->getOperand(0);
3604 SDValue N1 = N->getOperand(1);
3605 SDValue CarryIn = N->getOperand(2);
3606 SDLoc DL(N);
3607
3608 // canonicalize constant to RHS
3609 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3610 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3611 if (N0C && !N1C)
3612 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3613
3614 // fold (saddo_carry x, y, false) -> (saddo x, y)
3615 if (isNullConstant(CarryIn)) {
3616 if (!LegalOperations ||
3617 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3618 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3619 }
3620
3621 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3622 return Combined;
3623
3624 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3625 return Combined;
3626
3627 return SDValue();
3628}
3629
3630// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3631// clamp/truncation if necessary.
3632static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3633 SDValue RHS, SelectionDAG &DAG,
3634 const SDLoc &DL) {
3635 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3636 "Illegal truncation");
3637
3638 if (DstVT == SrcVT)
3639 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3640
3641 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3642 // clamping RHS.
3644 DstVT.getScalarSizeInBits());
3645 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3646 return SDValue();
3647
3648 SDValue SatLimit =
3650 DstVT.getScalarSizeInBits()),
3651 DL, SrcVT);
3652 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3653 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3654 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3655 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3656}
3657
3658// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3659// usubsat(a,b), optionally as a truncated type.
3660SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3661 if (N->getOpcode() != ISD::SUB ||
3662 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3663 return SDValue();
3664
3665 EVT SubVT = N->getValueType(0);
3666 SDValue Op0 = N->getOperand(0);
3667 SDValue Op1 = N->getOperand(1);
3668
3669 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3670 // they may be converted to usubsat(a,b).
3671 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3672 SDValue MaxLHS = Op0.getOperand(0);
3673 SDValue MaxRHS = Op0.getOperand(1);
3674 if (MaxLHS == Op1)
3675 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3676 if (MaxRHS == Op1)
3677 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3678 }
3679
3680 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3681 SDValue MinLHS = Op1.getOperand(0);
3682 SDValue MinRHS = Op1.getOperand(1);
3683 if (MinLHS == Op0)
3684 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3685 if (MinRHS == Op0)
3686 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3687 }
3688
3689 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3690 if (Op1.getOpcode() == ISD::TRUNCATE &&
3691 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3692 Op1.getOperand(0).hasOneUse()) {
3693 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3694 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3695 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3696 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3697 DAG, DL);
3698 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3699 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3700 DAG, DL);
3701 }
3702
3703 return SDValue();
3704}
3705
3706// Since it may not be valid to emit a fold to zero for vector initializers
3707// check if we can before folding.
3708static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3709 SelectionDAG &DAG, bool LegalOperations) {
3710 if (!VT.isVector())
3711 return DAG.getConstant(0, DL, VT);
3712 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3713 return DAG.getConstant(0, DL, VT);
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUB(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 EVT VT = N0.getValueType();
3721 unsigned BitWidth = VT.getScalarSizeInBits();
3722 SDLoc DL(N);
3723
3724 auto PeekThroughFreeze = [](SDValue N) {
3725 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3726 return N->getOperand(0);
3727 return N;
3728 };
3729
3730 // fold (sub x, x) -> 0
3731 // FIXME: Refactor this and xor and other similar operations together.
3732 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3733 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3734
3735 // fold (sub c1, c2) -> c3
3736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3737 return C;
3738
3739 // fold vector ops
3740 if (VT.isVector()) {
3741 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3742 return FoldedVOp;
3743
3744 // fold (sub x, 0) -> x, vector edition
3746 return N0;
3747 }
3748
3749 if (SDValue NewSel = foldBinOpIntoSelect(N))
3750 return NewSel;
3751
3752 // fold (sub x, c) -> (add x, -c)
3754 return DAG.getNode(ISD::ADD, DL, VT, N0,
3755 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756
3757 if (isNullOrNullSplat(N0)) {
3758 // Right-shifting everything out but the sign bit followed by negation is
3759 // the same as flipping arithmetic/logical shift type without the negation:
3760 // -(X >>u 31) -> (X >>s 31)
3761 // -(X >>s 31) -> (X >>u 31)
3762 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3764 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3765 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3766 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3767 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3768 }
3769 }
3770
3771 // 0 - X --> 0 if the sub is NUW.
3772 if (N->getFlags().hasNoUnsignedWrap())
3773 return N0;
3774
3776 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3777 // N1 must be 0 because negating the minimum signed value is undefined.
3778 if (N->getFlags().hasNoSignedWrap())
3779 return N0;
3780
3781 // 0 - X --> X if X is 0 or the minimum signed value.
3782 return N1;
3783 }
3784
3785 // Convert 0 - abs(x).
3786 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3788 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3789 return Result;
3790
3791 // Fold neg(splat(neg(x)) -> splat(x)
3792 if (VT.isVector()) {
3793 SDValue N1S = DAG.getSplatValue(N1, true);
3794 if (N1S && N1S.getOpcode() == ISD::SUB &&
3795 isNullConstant(N1S.getOperand(0)))
3796 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3797 }
3798 }
3799
3800 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3802 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3803
3804 // fold (A - (0-B)) -> A+B
3805 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3806 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3807
3808 // fold A-(A-B) -> B
3809 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3810 return N1.getOperand(1);
3811
3812 // fold (A+B)-A -> B
3813 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3814 return N0.getOperand(1);
3815
3816 // fold (A+B)-B -> A
3817 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3818 return N0.getOperand(0);
3819
3820 // fold (A+C1)-C2 -> A+(C1-C2)
3821 if (N0.getOpcode() == ISD::ADD) {
3822 SDValue N01 = N0.getOperand(1);
3823 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3824 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3825 }
3826
3827 // fold C2-(A+C1) -> (C2-C1)-A
3828 if (N1.getOpcode() == ISD::ADD) {
3829 SDValue N11 = N1.getOperand(1);
3830 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3831 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3832 }
3833
3834 // fold (A-C1)-C2 -> A-(C1+C2)
3835 if (N0.getOpcode() == ISD::SUB) {
3836 SDValue N01 = N0.getOperand(1);
3837 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3838 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3839 }
3840
3841 // fold (c1-A)-c2 -> (c1-c2)-A
3842 if (N0.getOpcode() == ISD::SUB) {
3843 SDValue N00 = N0.getOperand(0);
3844 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3845 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3846 }
3847
3848 SDValue A, B, C;
3849
3850 // fold ((A+(B+C))-B) -> A+C
3851 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3852 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3853
3854 // fold ((A+(B-C))-B) -> A-C
3855 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3856 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3857
3858 // fold ((A-(B-C))-C) -> A-B
3859 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3860 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3861
3862 // fold (A-(B-C)) -> A+(C-B)
3863 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3864 return DAG.getNode(ISD::ADD, DL, VT, N0,
3865 DAG.getNode(ISD::SUB, DL, VT, C, B));
3866
3867 // A - (A & B) -> A & (~B)
3868 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3869 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3870 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3871
3872 // fold (A - (-B * C)) -> (A + (B * C))
3873 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3874 return DAG.getNode(ISD::ADD, DL, VT, N0,
3875 DAG.getNode(ISD::MUL, DL, VT, B, C));
3876
3877 // If either operand of a sub is undef, the result is undef
3878 if (N0.isUndef())
3879 return N0;
3880 if (N1.isUndef())
3881 return N1;
3882
3883 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3884 return V;
3885
3886 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3887 return V;
3888
3889 // Try to match AVGCEIL fixedwidth pattern
3890 if (SDValue V = foldSubToAvg(N, DL))
3891 return V;
3892
3893 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3894 return V;
3895
3896 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3897 return V;
3898
3899 // (A - B) - 1 -> add (xor B, -1), A
3901 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3902
3903 // Look for:
3904 // sub y, (xor x, -1)
3905 // And if the target does not like this form then turn into:
3906 // add (add x, y), 1
3907 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3908 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3909 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3910 }
3911
3912 // Hoist one-use addition by non-opaque constant:
3913 // (x + C) - y -> (x - y) + C
3914 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3915 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3916 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3917 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3918 }
3919 // y - (x + C) -> (y - x) - C
3920 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3921 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3922 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3923 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3924 }
3925 // (x - C) - y -> (x - y) - C
3926 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3927 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3928 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3929 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3930 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3931 }
3932 // (C - x) - y -> C - (x + y)
3933 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3934 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3935 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3936 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3937 }
3938
3939 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3940 // rather than 'sub 0/1' (the sext should get folded).
3941 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3942 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3943 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3944 TLI.getBooleanContents(VT) ==
3946 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3947 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3948 }
3949
3950 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3951 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3952 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3953 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3954 SDValue S0 = N1.getOperand(0);
3955 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3957 if (C->getAPIntValue() == (BitWidth - 1))
3958 return DAG.getNode(ISD::ABS, DL, VT, S0);
3959 }
3960 }
3961
3962 // If the relocation model supports it, consider symbol offsets.
3963 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3964 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3965 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3966 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3967 if (GA->getGlobal() == GB->getGlobal())
3968 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3969 DL, VT);
3970 }
3971
3972 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3973 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3974 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3975 if (TN->getVT() == MVT::i1) {
3976 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3977 DAG.getConstant(1, DL, VT));
3978 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3979 }
3980 }
3981
3982 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3983 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3984 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3985 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3986 }
3987
3988 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3989 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3990 APInt NewStep = -N1.getConstantOperandAPInt(0);
3991 return DAG.getNode(ISD::ADD, DL, VT, N0,
3992 DAG.getStepVector(DL, VT, NewStep));
3993 }
3994
3995 // Prefer an add for more folding potential and possibly better codegen:
3996 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3997 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3998 SDValue ShAmt = N1.getOperand(1);
3999 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4000 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4001 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4002 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4003 }
4004 }
4005
4006 // As with the previous fold, prefer add for more folding potential.
4007 // Subtracting SMIN/0 is the same as adding SMIN/0:
4008 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4009 if (N1.getOpcode() == ISD::SHL) {
4011 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4012 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4013 }
4014
4015 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4016 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4017 N0.getResNo() == 0 && N0.hasOneUse())
4018 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4019 N0.getOperand(0), N1, N0.getOperand(2));
4020
4022 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4023 if (SDValue Carry = getAsCarry(TLI, N0)) {
4024 SDValue X = N1;
4025 SDValue Zero = DAG.getConstant(0, DL, VT);
4026 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4027 return DAG.getNode(ISD::UADDO_CARRY, DL,
4028 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4029 Carry);
4030 }
4031 }
4032
4033 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4034 // sub C0, X --> xor X, C0
4035 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4036 if (!C0->isOpaque()) {
4037 const APInt &C0Val = C0->getAPIntValue();
4038 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4039 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4040 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4041 }
4042 }
4043
4044 // smax(a,b) - smin(a,b) --> abds(a,b)
4045 if (hasOperation(ISD::ABDS, VT) &&
4046 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4048 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4049
4050 // umax(a,b) - umin(a,b) --> abdu(a,b)
4051 if (hasOperation(ISD::ABDU, VT) &&
4052 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4054 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4055
4056 return SDValue();
4057}
4058
4059SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4060 unsigned Opcode = N->getOpcode();
4061 SDValue N0 = N->getOperand(0);
4062 SDValue N1 = N->getOperand(1);
4063 EVT VT = N0.getValueType();
4064 bool IsSigned = Opcode == ISD::SSUBSAT;
4065 SDLoc DL(N);
4066
4067 // fold (sub_sat x, undef) -> 0
4068 if (N0.isUndef() || N1.isUndef())
4069 return DAG.getConstant(0, DL, VT);
4070
4071 // fold (sub_sat x, x) -> 0
4072 if (N0 == N1)
4073 return DAG.getConstant(0, DL, VT);
4074
4075 // fold (sub_sat c1, c2) -> c3
4076 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4077 return C;
4078
4079 // fold vector ops
4080 if (VT.isVector()) {
4081 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4082 return FoldedVOp;
4083
4084 // fold (sub_sat x, 0) -> x, vector edition
4086 return N0;
4087 }
4088
4089 // fold (sub_sat x, 0) -> x
4090 if (isNullConstant(N1))
4091 return N0;
4092
4093 // If it cannot overflow, transform into an sub.
4094 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4095 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4096
4097 return SDValue();
4098}
4099
4100SDValue DAGCombiner::visitSUBC(SDNode *N) {
4101 SDValue N0 = N->getOperand(0);
4102 SDValue N1 = N->getOperand(1);
4103 EVT VT = N0.getValueType();
4104 SDLoc DL(N);
4105
4106 // If the flag result is dead, turn this into an SUB.
4107 if (!N->hasAnyUseOfValue(1))
4108 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4109 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4110
4111 // fold (subc x, x) -> 0 + no borrow
4112 if (N0 == N1)
4113 return CombineTo(N, DAG.getConstant(0, DL, VT),
4114 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4115
4116 // fold (subc x, 0) -> x + no borrow
4117 if (isNullConstant(N1))
4118 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4119
4120 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4121 if (isAllOnesConstant(N0))
4122 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4123 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4124
4125 return SDValue();
4126}
4127
4128SDValue DAGCombiner::visitSUBO(SDNode *N) {
4129 SDValue N0 = N->getOperand(0);
4130 SDValue N1 = N->getOperand(1);
4131 EVT VT = N0.getValueType();
4132 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4133
4134 EVT CarryVT = N->getValueType(1);
4135 SDLoc DL(N);
4136
4137 // If the flag result is dead, turn this into an SUB.
4138 if (!N->hasAnyUseOfValue(1))
4139 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4140 DAG.getUNDEF(CarryVT));
4141
4142 // fold (subo x, x) -> 0 + no borrow
4143 if (N0 == N1)
4144 return CombineTo(N, DAG.getConstant(0, DL, VT),
4145 DAG.getConstant(0, DL, CarryVT));
4146
4147 // fold (subox, c) -> (addo x, -c)
4149 if (IsSigned && !N1C->isMinSignedValue())
4150 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4151 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4152
4153 // fold (subo x, 0) -> x + no borrow
4154 if (isNullOrNullSplat(N1))
4155 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4156
4157 // If it cannot overflow, transform into an sub.
4158 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4159 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4160 DAG.getConstant(0, DL, CarryVT));
4161
4162 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4163 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4164 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4165 DAG.getConstant(0, DL, CarryVT));
4166
4167 return SDValue();
4168}
4169
4170SDValue DAGCombiner::visitSUBE(SDNode *N) {
4171 SDValue N0 = N->getOperand(0);
4172 SDValue N1 = N->getOperand(1);
4173 SDValue CarryIn = N->getOperand(2);
4174
4175 // fold (sube x, y, false) -> (subc x, y)
4176 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4177 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4178
4179 return SDValue();
4180}
4181
4182SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4183 SDValue N0 = N->getOperand(0);
4184 SDValue N1 = N->getOperand(1);
4185 SDValue CarryIn = N->getOperand(2);
4186
4187 // fold (usubo_carry x, y, false) -> (usubo x, y)
4188 if (isNullConstant(CarryIn)) {
4189 if (!LegalOperations ||
4190 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4191 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4192 }
4193
4194 return SDValue();
4195}
4196
4197SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4198 SDValue N0 = N->getOperand(0);
4199 SDValue N1 = N->getOperand(1);
4200 SDValue CarryIn = N->getOperand(2);
4201
4202 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4203 if (isNullConstant(CarryIn)) {
4204 if (!LegalOperations ||
4205 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4206 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4207 }
4208
4209 return SDValue();
4210}
4211
4212// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4213// UMULFIXSAT here.
4214SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4215 SDValue N0 = N->getOperand(0);
4216 SDValue N1 = N->getOperand(1);
4217 SDValue Scale = N->getOperand(2);
4218 EVT VT = N0.getValueType();
4219
4220 // fold (mulfix x, undef, scale) -> 0
4221 if (N0.isUndef() || N1.isUndef())
4222 return DAG.getConstant(0, SDLoc(N), VT);
4223
4224 // Canonicalize constant to RHS (vector doesn't have to splat)
4227 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4228
4229 // fold (mulfix x, 0, scale) -> 0
4230 if (isNullConstant(N1))
4231 return DAG.getConstant(0, SDLoc(N), VT);
4232
4233 return SDValue();
4234}
4235
4236SDValue DAGCombiner::visitMUL(SDNode *N) {
4237 SDValue N0 = N->getOperand(0);
4238 SDValue N1 = N->getOperand(1);
4239 EVT VT = N0.getValueType();
4240 SDLoc DL(N);
4241
4242 // fold (mul x, undef) -> 0
4243 if (N0.isUndef() || N1.isUndef())
4244 return DAG.getConstant(0, DL, VT);
4245
4246 // fold (mul c1, c2) -> c1*c2
4247 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4248 return C;
4249
4250 // canonicalize constant to RHS (vector doesn't have to splat)
4253 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4254
4255 bool N1IsConst = false;
4256 bool N1IsOpaqueConst = false;
4257 APInt ConstValue1;
4258
4259 // fold vector ops
4260 if (VT.isVector()) {
4261 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4262 return FoldedVOp;
4263
4264 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4265 assert((!N1IsConst ||
4266 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4267 "Splat APInt should be element width");
4268 } else {
4269 N1IsConst = isa<ConstantSDNode>(N1);
4270 if (N1IsConst) {
4271 ConstValue1 = N1->getAsAPIntVal();
4272 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4273 }
4274 }
4275
4276 // fold (mul x, 0) -> 0
4277 if (N1IsConst && ConstValue1.isZero())
4278 return N1;
4279
4280 // fold (mul x, 1) -> x
4281 if (N1IsConst && ConstValue1.isOne())
4282 return N0;
4283
4284 if (SDValue NewSel = foldBinOpIntoSelect(N))
4285 return NewSel;
4286
4287 // fold (mul x, -1) -> 0-x
4288 if (N1IsConst && ConstValue1.isAllOnes())
4289 return DAG.getNegative(N0, DL, VT);
4290
4291 // fold (mul x, (1 << c)) -> x << c
4292 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4293 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4294 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4295 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4296 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4297 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4298 }
4299 }
4300
4301 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4302 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4303 unsigned Log2Val = (-ConstValue1).logBase2();
4304 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4305
4306 // FIXME: If the input is something that is easily negated (e.g. a
4307 // single-use add), we should put the negate there.
4308 return DAG.getNode(ISD::SUB, DL, VT,
4309 DAG.getConstant(0, DL, VT),
4310 DAG.getNode(ISD::SHL, DL, VT, N0,
4311 DAG.getConstant(Log2Val, DL, ShiftVT)));
4312 }
4313
4314 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4315 // hi result is in use in case we hit this mid-legalization.
4316 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4317 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4318 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4319 // TODO: Can we match commutable operands with getNodeIfExists?
4320 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4321 if (LoHi->hasAnyUseOfValue(1))
4322 return SDValue(LoHi, 0);
4323 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4324 if (LoHi->hasAnyUseOfValue(1))
4325 return SDValue(LoHi, 0);
4326 }
4327 }
4328
4329 // Try to transform:
4330 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4331 // mul x, (2^N + 1) --> add (shl x, N), x
4332 // mul x, (2^N - 1) --> sub (shl x, N), x
4333 // Examples: x * 33 --> (x << 5) + x
4334 // x * 15 --> (x << 4) - x
4335 // x * -33 --> -((x << 5) + x)
4336 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4337 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4338 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4339 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4340 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4341 // x * 0xf800 --> (x << 16) - (x << 11)
4342 // x * -0x8800 --> -((x << 15) + (x << 11))
4343 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4344 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4345 // TODO: We could handle more general decomposition of any constant by
4346 // having the target set a limit on number of ops and making a
4347 // callback to determine that sequence (similar to sqrt expansion).
4348 unsigned MathOp = ISD::DELETED_NODE;
4349 APInt MulC = ConstValue1.abs();
4350 // The constant `2` should be treated as (2^0 + 1).
4351 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4352 MulC.lshrInPlace(TZeros);
4353 if ((MulC - 1).isPowerOf2())
4354 MathOp = ISD::ADD;
4355 else if ((MulC + 1).isPowerOf2())
4356 MathOp = ISD::SUB;
4357
4358 if (MathOp != ISD::DELETED_NODE) {
4359 unsigned ShAmt =
4360 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4361 ShAmt += TZeros;
4362 assert(ShAmt < VT.getScalarSizeInBits() &&
4363 "multiply-by-constant generated out of bounds shift");
4364 SDValue Shl =
4365 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4366 SDValue R =
4367 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4368 DAG.getNode(ISD::SHL, DL, VT, N0,
4369 DAG.getConstant(TZeros, DL, VT)))
4370 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4371 if (ConstValue1.isNegative())
4372 R = DAG.getNegative(R, DL, VT);
4373 return R;
4374 }
4375 }
4376
4377 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4378 if (N0.getOpcode() == ISD::SHL) {
4379 SDValue N01 = N0.getOperand(1);
4380 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4381 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4382 }
4383
4384 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4385 // use.
4386 {
4387 SDValue Sh, Y;
4388
4389 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4390 if (N0.getOpcode() == ISD::SHL &&
4392 Sh = N0; Y = N1;
4393 } else if (N1.getOpcode() == ISD::SHL &&
4395 N1->hasOneUse()) {
4396 Sh = N1; Y = N0;
4397 }
4398
4399 if (Sh.getNode()) {
4400 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4401 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4402 }
4403 }
4404
4405 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4406 if (N0.getOpcode() == ISD::ADD &&
4410 return DAG.getNode(
4411 ISD::ADD, DL, VT,
4412 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4413 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4414
4415 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4417 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4418 const APInt &C0 = N0.getConstantOperandAPInt(0);
4419 const APInt &C1 = NC1->getAPIntValue();
4420 return DAG.getVScale(DL, VT, C0 * C1);
4421 }
4422
4423 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4424 APInt MulVal;
4425 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4426 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4427 const APInt &C0 = N0.getConstantOperandAPInt(0);
4428 APInt NewStep = C0 * MulVal;
4429 return DAG.getStepVector(DL, VT, NewStep);
4430 }
4431
4432 // Fold ((mul x, 0/undef) -> 0,
4433 // (mul x, 1) -> x) -> x)
4434 // -> and(x, mask)
4435 // We can replace vectors with '0' and '1' factors with a clearing mask.
4436 if (VT.isFixedLengthVector()) {
4437 unsigned NumElts = VT.getVectorNumElements();
4438 SmallBitVector ClearMask;
4439 ClearMask.reserve(NumElts);
4440 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4441 if (!V || V->isZero()) {
4442 ClearMask.push_back(true);
4443 return true;
4444 }
4445 ClearMask.push_back(false);
4446 return V->isOne();
4447 };
4448 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4449 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4450 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4451 EVT LegalSVT = N1.getOperand(0).getValueType();
4452 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4453 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4455 for (unsigned I = 0; I != NumElts; ++I)
4456 if (ClearMask[I])
4457 Mask[I] = Zero;
4458 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4459 }
4460 }
4461
4462 // reassociate mul
4463 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4464 return RMUL;
4465
4466 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4467 if (SDValue SD =
4468 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4469 return SD;
4470
4471 // Simplify the operands using demanded-bits information.
4473 return SDValue(N, 0);
4474
4475 return SDValue();
4476}
4477
4478/// Return true if divmod libcall is available.
4480 const TargetLowering &TLI) {
4481 RTLIB::Libcall LC;
4482 EVT NodeType = Node->getValueType(0);
4483 if (!NodeType.isSimple())
4484 return false;
4485 switch (NodeType.getSimpleVT().SimpleTy) {
4486 default: return false; // No libcall for vector types.
4487 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4488 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4489 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4490 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4491 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4492 }
4493
4494 return TLI.getLibcallName(LC) != nullptr;
4495}
4496
4497/// Issue divrem if both quotient and remainder are needed.
4498SDValue DAGCombiner::useDivRem(SDNode *Node) {
4499 if (Node->use_empty())
4500 return SDValue(); // This is a dead node, leave it alone.
4501
4502 unsigned Opcode = Node->getOpcode();
4503 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4504 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4505
4506 // DivMod lib calls can still work on non-legal types if using lib-calls.
4507 EVT VT = Node->getValueType(0);
4508 if (VT.isVector() || !VT.isInteger())
4509 return SDValue();
4510
4511 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4512 return SDValue();
4513
4514 // If DIVREM is going to get expanded into a libcall,
4515 // but there is no libcall available, then don't combine.
4516 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4518 return SDValue();
4519
4520 // If div is legal, it's better to do the normal expansion
4521 unsigned OtherOpcode = 0;
4522 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4523 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4524 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4525 return SDValue();
4526 } else {
4527 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4528 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4529 return SDValue();
4530 }
4531
4532 SDValue Op0 = Node->getOperand(0);
4533 SDValue Op1 = Node->getOperand(1);
4534 SDValue combined;
4535 for (SDNode *User : Op0->uses()) {
4536 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4537 User->use_empty())
4538 continue;
4539 // Convert the other matching node(s), too;
4540 // otherwise, the DIVREM may get target-legalized into something
4541 // target-specific that we won't be able to recognize.
4542 unsigned UserOpc = User->getOpcode();
4543 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4544 User->getOperand(0) == Op0 &&
4545 User->getOperand(1) == Op1) {
4546 if (!combined) {
4547 if (UserOpc == OtherOpcode) {
4548 SDVTList VTs = DAG.getVTList(VT, VT);
4549 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4550 } else if (UserOpc == DivRemOpc) {
4551 combined = SDValue(User, 0);
4552 } else {
4553 assert(UserOpc == Opcode);
4554 continue;
4555 }
4556 }
4557 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4558 CombineTo(User, combined);
4559 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4560 CombineTo(User, combined.getValue(1));
4561 }
4562 }
4563 return combined;
4564}
4565
4567 SDValue N0 = N->getOperand(0);
4568 SDValue N1 = N->getOperand(1);
4569 EVT VT = N->getValueType(0);
4570 SDLoc DL(N);
4571
4572 unsigned Opc = N->getOpcode();
4573 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4575
4576 // X / undef -> undef
4577 // X % undef -> undef
4578 // X / 0 -> undef
4579 // X % 0 -> undef
4580 // NOTE: This includes vectors where any divisor element is zero/undef.
4581 if (DAG.isUndef(Opc, {N0, N1}))
4582 return DAG.getUNDEF(VT);
4583
4584 // undef / X -> 0
4585 // undef % X -> 0
4586 if (N0.isUndef())
4587 return DAG.getConstant(0, DL, VT);
4588
4589 // 0 / X -> 0
4590 // 0 % X -> 0
4592 if (N0C && N0C->isZero())
4593 return N0;
4594
4595 // X / X -> 1
4596 // X % X -> 0
4597 if (N0 == N1)
4598 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4599
4600 // X / 1 -> X
4601 // X % 1 -> 0
4602 // If this is a boolean op (single-bit element type), we can't have
4603 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4604 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4605 // it's a 1.
4606 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4607 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4608
4609 return SDValue();
4610}
4611
4612SDValue DAGCombiner::visitSDIV(SDNode *N) {
4613 SDValue N0 = N->getOperand(0);
4614 SDValue N1 = N->getOperand(1);
4615 EVT VT = N->getValueType(0);
4616 EVT CCVT = getSetCCResultType(VT);
4617 SDLoc DL(N);
4618
4619 // fold (sdiv c1, c2) -> c1/c2
4620 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4621 return C;
4622
4623 // fold vector ops
4624 if (VT.isVector())
4625 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4626 return FoldedVOp;
4627
4628 // fold (sdiv X, -1) -> 0-X
4630 if (N1C && N1C->isAllOnes())
4631 return DAG.getNegative(N0, DL, VT);
4632
4633 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4634 if (N1C && N1C->isMinSignedValue())
4635 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4636 DAG.getConstant(1, DL, VT),
4637 DAG.getConstant(0, DL, VT));
4638
4639 if (SDValue V = simplifyDivRem(N, DAG))
4640 return V;
4641
4642 if (SDValue NewSel = foldBinOpIntoSelect(N))
4643 return NewSel;
4644
4645 // If we know the sign bits of both operands are zero, strength reduce to a
4646 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4647 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4648 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4649
4650 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4651 // If the corresponding remainder node exists, update its users with
4652 // (Dividend - (Quotient * Divisor).
4653 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4654 { N0, N1 })) {
4655 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4656 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4657 AddToWorklist(Mul.getNode());
4658 AddToWorklist(Sub.getNode());
4659 CombineTo(RemNode, Sub);
4660 }
4661 return V;
4662 }
4663
4664 // sdiv, srem -> sdivrem
4665 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4666 // true. Otherwise, we break the simplification logic in visitREM().
4668 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4669 if (SDValue DivRem = useDivRem(N))
4670 return DivRem;
4671
4672 return SDValue();
4673}
4674
4675static bool isDivisorPowerOfTwo(SDValue Divisor) {
4676 // Helper for determining whether a value is a power-2 constant scalar or a
4677 // vector of such elements.
4678 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4679 if (C->isZero() || C->isOpaque())
4680 return false;
4681 if (C->getAPIntValue().isPowerOf2())
4682 return true;
4683 if (C->getAPIntValue().isNegatedPowerOf2())
4684 return true;
4685 return false;
4686 };
4687
4688 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4689}
4690
4691SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4692 SDLoc DL(N);
4693 EVT VT = N->getValueType(0);
4694 EVT CCVT = getSetCCResultType(VT);
4695 unsigned BitWidth = VT.getScalarSizeInBits();
4696
4697 // fold (sdiv X, pow2) -> simple ops after legalize
4698 // FIXME: We check for the exact bit here because the generic lowering gives
4699 // better results in that case. The target-specific lowering should learn how
4700 // to handle exact sdivs efficiently.
4701 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4702 // Target-specific implementation of sdiv x, pow2.
4703 if (SDValue Res = BuildSDIVPow2(N))
4704 return Res;
4705
4706 // Create constants that are functions of the shift amount value.
4707 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4708 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4709 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4710 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4711 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4712 if (!isConstantOrConstantVector(Inexact))
4713 return SDValue();
4714
4715 // Splat the sign bit into the register
4716 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4717 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4718 AddToWorklist(Sign.getNode());
4719
4720 // Add (N0 < 0) ? abs2 - 1 : 0;
4721 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4722 AddToWorklist(Srl.getNode());
4723 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4724 AddToWorklist(Add.getNode());
4725 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4726 AddToWorklist(Sra.getNode());
4727
4728 // Special case: (sdiv X, 1) -> X
4729 // Special Case: (sdiv X, -1) -> 0-X
4730 SDValue One = DAG.getConstant(1, DL, VT);
4732 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4733 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4734 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4735 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4736
4737 // If dividing by a positive value, we're done. Otherwise, the result must
4738 // be negated.
4739 SDValue Zero = DAG.getConstant(0, DL, VT);
4740 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4741
4742 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4743 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4744 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4745 return Res;
4746 }
4747
4748 // If integer divide is expensive and we satisfy the requirements, emit an
4749 // alternate sequence. Targets may check function attributes for size/speed
4750 // trade-offs.
4753 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4754 if (SDValue Op = BuildSDIV(N))
4755 return Op;
4756
4757 return SDValue();
4758}
4759
4760SDValue DAGCombiner::visitUDIV(SDNode *N) {
4761 SDValue N0 = N->getOperand(0);
4762 SDValue N1 = N->getOperand(1);
4763 EVT VT = N->getValueType(0);
4764 EVT CCVT = getSetCCResultType(VT);
4765 SDLoc DL(N);
4766
4767 // fold (udiv c1, c2) -> c1/c2
4768 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4769 return C;
4770
4771 // fold vector ops
4772 if (VT.isVector())
4773 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4774 return FoldedVOp;
4775
4776 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4778 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4779 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4780 DAG.getConstant(1, DL, VT),
4781 DAG.getConstant(0, DL, VT));
4782 }
4783
4784 if (SDValue V = simplifyDivRem(N, DAG))
4785 return V;
4786
4787 if (SDValue NewSel = foldBinOpIntoSelect(N))
4788 return NewSel;
4789
4790 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4791 // If the corresponding remainder node exists, update its users with
4792 // (Dividend - (Quotient * Divisor).
4793 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4794 { N0, N1 })) {
4795 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4796 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4797 AddToWorklist(Mul.getNode());
4798 AddToWorklist(Sub.getNode());
4799 CombineTo(RemNode, Sub);
4800 }
4801 return V;
4802 }
4803
4804 // sdiv, srem -> sdivrem
4805 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4806 // true. Otherwise, we break the simplification logic in visitREM().
4808 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4809 if (SDValue DivRem = useDivRem(N))
4810 return DivRem;
4811
4812 return SDValue();
4813}
4814
4815SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4816 SDLoc DL(N);
4817 EVT VT = N->getValueType(0);
4818
4819 // fold (udiv x, (1 << c)) -> x >>u c
4820 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4821 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4822 AddToWorklist(LogBase2.getNode());
4823
4824 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4825 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4826 AddToWorklist(Trunc.getNode());
4827 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4828 }
4829 }
4830
4831 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4832 if (N1.getOpcode() == ISD::SHL) {
4833 SDValue N10 = N1.getOperand(0);
4834 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4835 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4836 AddToWorklist(LogBase2.getNode());
4837
4838 EVT ADDVT = N1.getOperand(1).getValueType();
4839 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4840 AddToWorklist(Trunc.getNode());
4841 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4842 AddToWorklist(Add.getNode());
4843 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4844 }
4845 }
4846 }
4847
4848 // fold (udiv x, c) -> alternate
4851 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4852 if (SDValue Op = BuildUDIV(N))
4853 return Op;
4854
4855 return SDValue();
4856}
4857
4858SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4859 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4860 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4861 // Target-specific implementation of srem x, pow2.
4862 if (SDValue Res = BuildSREMPow2(N))
4863 return Res;
4864 }
4865 return SDValue();
4866}
4867
4868// handles ISD::SREM and ISD::UREM
4869SDValue DAGCombiner::visitREM(SDNode *N) {
4870 unsigned Opcode = N->getOpcode();
4871 SDValue N0 = N->getOperand(0);
4872 SDValue N1 = N->getOperand(1);
4873 EVT VT = N->getValueType(0);
4874 EVT CCVT = getSetCCResultType(VT);
4875
4876 bool isSigned = (Opcode == ISD::SREM);
4877 SDLoc DL(N);
4878
4879 // fold (rem c1, c2) -> c1%c2
4880 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4881 return C;
4882
4883 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4884 // Freeze the numerator to avoid a miscompile with an undefined value.
4885 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4886 CCVT.isVector() == VT.isVector()) {
4887 SDValue F0 = DAG.getFreeze(N0);
4888 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4889 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4890 }
4891
4892 if (SDValue V = simplifyDivRem(N, DAG))
4893 return V;
4894
4895 if (SDValue NewSel = foldBinOpIntoSelect(N))
4896 return NewSel;
4897
4898 if (isSigned) {
4899 // If we know the sign bits of both operands are zero, strength reduce to a
4900 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4901 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4902 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4903 } else {
4904 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4905 // fold (urem x, pow2) -> (and x, pow2-1)
4906 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4907 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4908 AddToWorklist(Add.getNode());
4909 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4910 }
4911 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4912 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4913 // TODO: We should sink the following into isKnownToBePowerOfTwo
4914 // using a OrZero parameter analogous to our handling in ValueTracking.
4915 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4917 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4918 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4919 AddToWorklist(Add.getNode());
4920 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4921 }
4922 }
4923
4925
4926 // If X/C can be simplified by the division-by-constant logic, lower
4927 // X%C to the equivalent of X-X/C*C.
4928 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4929 // speculative DIV must not cause a DIVREM conversion. We guard against this
4930 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4931 // combine will not return a DIVREM. Regardless, checking cheapness here
4932 // makes sense since the simplification results in fatter code.
4933 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4934 if (isSigned) {
4935 // check if we can build faster implementation for srem
4936 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4937 return OptimizedRem;
4938 }
4939
4940 SDValue OptimizedDiv =
4941 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4942 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4943 // If the equivalent Div node also exists, update its users.
4944 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4945 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4946 { N0, N1 }))
4947 CombineTo(DivNode, OptimizedDiv);
4948 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4949 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4950 AddToWorklist(OptimizedDiv.getNode());
4951 AddToWorklist(Mul.getNode());
4952 return Sub;
4953 }
4954 }
4955
4956 // sdiv, srem -> sdivrem
4957 if (SDValue DivRem = useDivRem(N))
4958 return DivRem.getValue(1);
4959
4960 return SDValue();
4961}
4962
4963SDValue DAGCombiner::visitMULHS(SDNode *N) {
4964 SDValue N0 = N->getOperand(0);
4965 SDValue N1 = N->getOperand(1);
4966 EVT VT = N->getValueType(0);
4967 SDLoc DL(N);
4968
4969 // fold (mulhs c1, c2)
4970 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4971 return C;
4972
4973 // canonicalize constant to RHS.
4976 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4977
4978 if (VT.isVector()) {
4979 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980 return FoldedVOp;
4981
4982 // fold (mulhs x, 0) -> 0
4983 // do not return N1, because undef node may exist.
4985 return DAG.getConstant(0, DL, VT);
4986 }
4987
4988 // fold (mulhs x, 0) -> 0
4989 if (isNullConstant(N1))
4990 return N1;
4991
4992 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4993 if (isOneConstant(N1))
4994 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4997
4998 // fold (mulhs x, undef) -> 0
4999 if (N0.isUndef() || N1.isUndef())
5000 return DAG.getConstant(0, DL, VT);
5001
5002 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5003 // plus a shift.
5004 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5005 !VT.isVector()) {
5006 MVT Simple = VT.getSimpleVT();
5007 unsigned SimpleSize = Simple.getSizeInBits();
5008 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5009 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5010 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5011 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5012 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5013 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5014 DAG.getConstant(SimpleSize, DL,
5016 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5017 }
5018 }
5019
5020 return SDValue();
5021}
5022
5023SDValue DAGCombiner::visitMULHU(SDNode *N) {
5024 SDValue N0 = N->getOperand(0);
5025 SDValue N1 = N->getOperand(1);
5026 EVT VT = N->getValueType(0);
5027 SDLoc DL(N);
5028
5029 // fold (mulhu c1, c2)
5030 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5031 return C;
5032
5033 // canonicalize constant to RHS.
5036 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5037
5038 if (VT.isVector()) {
5039 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5040 return FoldedVOp;
5041
5042 // fold (mulhu x, 0) -> 0
5043 // do not return N1, because undef node may exist.
5045 return DAG.getConstant(0, DL, VT);
5046 }
5047
5048 // fold (mulhu x, 0) -> 0
5049 if (isNullConstant(N1))
5050 return N1;
5051
5052 // fold (mulhu x, 1) -> 0
5053 if (isOneConstant(N1))
5054 return DAG.getConstant(0, DL, N0.getValueType());
5055
5056 // fold (mulhu x, undef) -> 0
5057 if (N0.isUndef() || N1.isUndef())
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5061 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5062 hasOperation(ISD::SRL, VT)) {
5063 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5064 unsigned NumEltBits = VT.getScalarSizeInBits();
5065 SDValue SRLAmt = DAG.getNode(
5066 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5067 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5068 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5069 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5070 }
5071 }
5072
5073 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5074 // plus a shift.
5075 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5076 !VT.isVector()) {
5077 MVT Simple = VT.getSimpleVT();
5078 unsigned SimpleSize = Simple.getSizeInBits();
5079 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5080 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5081 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5082 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5083 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5084 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5085 DAG.getConstant(SimpleSize, DL,
5087 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5088 }
5089 }
5090
5091 // Simplify the operands using demanded-bits information.
5092 // We don't have demanded bits support for MULHU so this just enables constant
5093 // folding based on known bits.
5095 return SDValue(N, 0);
5096
5097 return SDValue();
5098}
5099
5100SDValue DAGCombiner::visitAVG(SDNode *N) {
5101 unsigned Opcode = N->getOpcode();
5102 SDValue N0 = N->getOperand(0);
5103 SDValue N1 = N->getOperand(1);
5104 EVT VT = N->getValueType(0);
5105 SDLoc DL(N);
5106
5107 // fold (avg c1, c2)
5108 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5109 return C;
5110
5111 // canonicalize constant to RHS.
5114 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5115
5116 if (VT.isVector()) {
5117 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5118 return FoldedVOp;
5119
5120 // fold (avgfloor x, 0) -> x >> 1
5122 if (Opcode == ISD::AVGFLOORS)
5123 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5124 if (Opcode == ISD::AVGFLOORU)
5125 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5126 }
5127 }
5128
5129 // fold (avg x, undef) -> x
5130 if (N0.isUndef())
5131 return N1;
5132 if (N1.isUndef())
5133 return N0;
5134
5135 // Fold (avg x, x) --> x
5136 if (N0 == N1 && Level >= AfterLegalizeTypes)
5137 return N0;
5138
5139 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5140
5141 return SDValue();
5142}
5143
5144SDValue DAGCombiner::visitABD(SDNode *N) {
5145 unsigned Opcode = N->getOpcode();
5146 SDValue N0 = N->getOperand(0);
5147 SDValue N1 = N->getOperand(1);
5148 EVT VT = N->getValueType(0);
5149 SDLoc DL(N);
5150
5151 // fold (abd c1, c2)
5152 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5153 return C;
5154
5155 // canonicalize constant to RHS.
5158 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5159
5160 if (VT.isVector()) {
5161 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5162 return FoldedVOp;
5163
5164 // fold (abds x, 0) -> abs x
5165 // fold (abdu x, 0) -> x
5167 if (Opcode == ISD::ABDS)
5168 return DAG.getNode(ISD::ABS, DL, VT, N0);
5169 if (Opcode == ISD::ABDU)
5170 return N0;
5171 }
5172 }
5173
5174 // fold (abd x, undef) -> 0
5175 if (N0.isUndef() || N1.isUndef())
5176 return DAG.getConstant(0, DL, VT);
5177
5178 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5179 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5180 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5181 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5182
5183 return SDValue();
5184}
5185
5186/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5187/// give the opcodes for the two computations that are being performed. Return
5188/// true if a simplification was made.
5189SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5190 unsigned HiOp) {
5191 // If the high half is not needed, just compute the low half.
5192 bool HiExists = N->hasAnyUseOfValue(1);
5193 if (!HiExists && (!LegalOperations ||
5194 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5195 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5196 return CombineTo(N, Res, Res);
5197 }
5198
5199 // If the low half is not needed, just compute the high half.
5200 bool LoExists = N->hasAnyUseOfValue(0);
5201 if (!LoExists && (!LegalOperations ||
5202 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5203 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5204 return CombineTo(N, Res, Res);
5205 }
5206
5207 // If both halves are used, return as it is.
5208 if (LoExists && HiExists)
5209 return SDValue();
5210
5211 // If the two computed results can be simplified separately, separate them.
5212 if (LoExists) {
5213 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5214 AddToWorklist(Lo.getNode());
5215 SDValue LoOpt = combine(Lo.getNode());
5216 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5217 (!LegalOperations ||
5218 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5219 return CombineTo(N, LoOpt, LoOpt);
5220 }
5221
5222 if (HiExists) {
5223 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5224 AddToWorklist(Hi.getNode());
5225 SDValue HiOpt = combine(Hi.getNode());
5226 if (HiOpt.getNode() && HiOpt != Hi &&
5227 (!LegalOperations ||
5228 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5229 return CombineTo(N, HiOpt, HiOpt);
5230 }
5231
5232 return SDValue();
5233}
5234
5235SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5236 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5237 return Res;
5238
5239 SDValue N0 = N->getOperand(0);
5240 SDValue N1 = N->getOperand(1);
5241 EVT VT = N->getValueType(0);
5242 SDLoc DL(N);
5243
5244 // Constant fold.
5245 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5246 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5247
5248 // canonicalize constant to RHS (vector doesn't have to splat)
5251 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5252
5253 // If the type is twice as wide is legal, transform the mulhu to a wider
5254 // multiply plus a shift.
5255 if (VT.isSimple() && !VT.isVector()) {
5256 MVT Simple = VT.getSimpleVT();
5257 unsigned SimpleSize = Simple.getSizeInBits();
5258 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5259 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5260 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5261 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5262 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5263 // Compute the high part as N1.
5264 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5265 DAG.getConstant(SimpleSize, DL,
5266 getShiftAmountTy(Lo.getValueType())));
5267 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5268 // Compute the low part as N0.
5269 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5270 return CombineTo(N, Lo, Hi);
5271 }
5272 }
5273
5274 return SDValue();
5275}
5276
5277SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5278 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5279 return Res;
5280
5281 SDValue N0 = N->getOperand(0);
5282 SDValue N1 = N->getOperand(1);
5283 EVT VT = N->getValueType(0);
5284 SDLoc DL(N);
5285
5286 // Constant fold.
5287 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5288 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5289
5290 // canonicalize constant to RHS (vector doesn't have to splat)
5293 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5294
5295 // (umul_lohi N0, 0) -> (0, 0)
5296 if (isNullConstant(N1)) {
5297 SDValue Zero = DAG.getConstant(0, DL, VT);
5298 return CombineTo(N, Zero, Zero);
5299 }
5300
5301 // (umul_lohi N0, 1) -> (N0, 0)
5302 if (isOneConstant(N1)) {
5303 SDValue Zero = DAG.getConstant(0, DL, VT);
5304 return CombineTo(N, N0, Zero);
5305 }
5306
5307 // If the type is twice as wide is legal, transform the mulhu to a wider
5308 // multiply plus a shift.
5309 if (VT.isSimple() && !VT.isVector()) {
5310 MVT Simple = VT.getSimpleVT();
5311 unsigned SimpleSize = Simple.getSizeInBits();
5312 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5313 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5314 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5315 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5316 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5317 // Compute the high part as N1.
5318 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5319 DAG.getConstant(SimpleSize, DL,
5320 getShiftAmountTy(Lo.getValueType())));
5321 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5322 // Compute the low part as N0.
5323 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5324 return CombineTo(N, Lo, Hi);
5325 }
5326 }
5327
5328 return SDValue();
5329}
5330
5331SDValue DAGCombiner::visitMULO(SDNode *N) {
5332 SDValue N0 = N->getOperand(0);
5333 SDValue N1 = N->getOperand(1);
5334 EVT VT = N0.getValueType();
5335 bool IsSigned = (ISD::SMULO == N->getOpcode());
5336
5337 EVT CarryVT = N->getValueType(1);
5338 SDLoc DL(N);
5339
5342
5343 // fold operation with constant operands.
5344 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5345 // multiple results.
5346 if (N0C && N1C) {
5347 bool Overflow;
5348 APInt Result =
5349 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5350 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5351 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5352 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5353 }
5354
5355 // canonicalize constant to RHS.
5358 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5359
5360 // fold (mulo x, 0) -> 0 + no carry out
5361 if (isNullOrNullSplat(N1))
5362 return CombineTo(N, DAG.getConstant(0, DL, VT),
5363 DAG.getConstant(0, DL, CarryVT));
5364
5365 // (mulo x, 2) -> (addo x, x)
5366 // FIXME: This needs a freeze.
5367 if (N1C && N1C->getAPIntValue() == 2 &&
5368 (!IsSigned || VT.getScalarSizeInBits() > 2))
5369 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5370 N->getVTList(), N0, N0);
5371
5372 // A 1 bit SMULO overflows if both inputs are 1.
5373 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5374 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5375 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5376 DAG.getConstant(0, DL, VT), ISD::SETNE);
5377 return CombineTo(N, And, Cmp);
5378 }
5379
5380 // If it cannot overflow, transform into a mul.
5381 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5382 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5383 DAG.getConstant(0, DL, CarryVT));
5384 return SDValue();
5385}
5386
5387// Function to calculate whether the Min/Max pair of SDNodes (potentially
5388// swapped around) make a signed saturate pattern, clamping to between a signed
5389// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5390// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5391// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5392// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5394 SDValue N3, ISD::CondCode CC, unsigned &BW,
5395 bool &Unsigned, SelectionDAG &DAG) {
5396 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5397 ISD::CondCode CC) {
5398 // The compare and select operand should be the same or the select operands
5399 // should be truncated versions of the comparison.
5400 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5401 return 0;
5402 // The constants need to be the same or a truncated version of each other.
5405 if (!N1C || !N3C)
5406 return 0;
5407 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5408 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5409 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5410 return 0;
5411 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5412 };
5413
5414 // Check the initial value is a SMIN/SMAX equivalent.
5415 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5416 if (!Opcode0)
5417 return SDValue();
5418
5419 // We could only need one range check, if the fptosi could never produce
5420 // the upper value.
5421 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5422 if (isNullOrNullSplat(N3)) {
5423 EVT IntVT = N0.getValueType().getScalarType();
5424 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5425 if (FPVT.isSimple()) {
5426 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5427 const fltSemantics &Semantics = InputTy->getFltSemantics();
5428 uint32_t MinBitWidth =
5429 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5430 if (IntVT.getSizeInBits() >= MinBitWidth) {
5431 Unsigned = true;
5432 BW = PowerOf2Ceil(MinBitWidth);
5433 return N0;
5434 }
5435 }
5436 }
5437 }
5438
5439 SDValue N00, N01, N02, N03;
5440 ISD::CondCode N0CC;
5441 switch (N0.getOpcode()) {
5442 case ISD::SMIN:
5443 case ISD::SMAX:
5444 N00 = N02 = N0.getOperand(0);
5445 N01 = N03 = N0.getOperand(1);
5446 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5447 break;
5448 case ISD::SELECT_CC:
5449 N00 = N0.getOperand(0);
5450 N01 = N0.getOperand(1);
5451 N02 = N0.getOperand(2);
5452 N03 = N0.getOperand(3);
5453 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5454 break;
5455 case ISD::SELECT:
5456 case ISD::VSELECT:
5457 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5458 return SDValue();
5459 N00 = N0.getOperand(0).getOperand(0);
5460 N01 = N0.getOperand(0).getOperand(1);
5461 N02 = N0.getOperand(1);
5462 N03 = N0.getOperand(2);
5463 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5464 break;
5465 default:
5466 return SDValue();
5467 }
5468
5469 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5470 if (!Opcode1 || Opcode0 == Opcode1)
5471 return SDValue();
5472
5473 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5474 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5475 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5476 return SDValue();
5477
5478 const APInt &MinC = MinCOp->getAPIntValue();
5479 const APInt &MaxC = MaxCOp->getAPIntValue();
5480 APInt MinCPlus1 = MinC + 1;
5481 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5482 BW = MinCPlus1.exactLogBase2() + 1;
5483 Unsigned = false;
5484 return N02;
5485 }
5486
5487 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5488 BW = MinCPlus1.exactLogBase2();
5489 Unsigned = true;
5490 return N02;
5491 }
5492
5493 return SDValue();
5494}
5495
5498 SelectionDAG &DAG) {
5499 unsigned BW;
5500 bool Unsigned;
5501 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5502 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5503 return SDValue();
5504 EVT FPVT = Fp.getOperand(0).getValueType();
5505 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5506 if (FPVT.isVector())
5507 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5508 FPVT.getVectorElementCount());
5509 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5510 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5511 return SDValue();
5512 SDLoc DL(Fp);
5513 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5514 DAG.getValueType(NewVT.getScalarType()));
5515 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5516}
5517
5520 SelectionDAG &DAG) {
5521 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5522 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5523 // be truncated versions of the setcc (N0/N1).
5524 if ((N0 != N2 &&
5525 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5527 return SDValue();
5530 if (!N1C || !N3C)
5531 return SDValue();
5532 const APInt &C1 = N1C->getAPIntValue();
5533 const APInt &C3 = N3C->getAPIntValue();
5534 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5535 C1 != C3.zext(C1.getBitWidth()))
5536 return SDValue();
5537
5538 unsigned BW = (C1 + 1).exactLogBase2();
5539 EVT FPVT = N0.getOperand(0).getValueType();
5540 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5541 if (FPVT.isVector())
5542 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5543 FPVT.getVectorElementCount());
5545 FPVT, NewVT))
5546 return SDValue();
5547
5548 SDValue Sat =
5549 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5550 DAG.getValueType(NewVT.getScalarType()));
5551 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5552}
5553
5554SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5555 SDValue N0 = N->getOperand(0);
5556 SDValue N1 = N->getOperand(1);
5557 EVT VT = N0.getValueType();
5558 unsigned Opcode = N->getOpcode();
5559 SDLoc DL(N);
5560
5561 // fold operation with constant operands.
5562 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5563 return C;
5564
5565 // If the operands are the same, this is a no-op.
5566 if (N0 == N1)
5567 return N0;
5568
5569 // canonicalize constant to RHS
5572 return DAG.getNode(Opcode, DL, VT, N1, N0);
5573
5574 // fold vector ops
5575 if (VT.isVector())
5576 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5577 return FoldedVOp;
5578
5579 // reassociate minmax
5580 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5581 return RMINMAX;
5582
5583 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5584 // Only do this if:
5585 // 1. The current op isn't legal and the flipped is.
5586 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5587 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5588 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5589 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5590 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5591 unsigned AltOpcode;
5592 switch (Opcode) {
5593 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5594 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5595 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5596 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5597 default: llvm_unreachable("Unknown MINMAX opcode");
5598 }
5599 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5600 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5601 }
5602
5603 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5605 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5606 return S;
5607 if (Opcode == ISD::UMIN)
5608 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5609 return S;
5610
5611 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5612 auto ReductionOpcode = [](unsigned Opcode) {
5613 switch (Opcode) {
5614 case ISD::SMIN:
5615 return ISD::VECREDUCE_SMIN;
5616 case ISD::SMAX:
5617 return ISD::VECREDUCE_SMAX;
5618 case ISD::UMIN:
5619 return ISD::VECREDUCE_UMIN;
5620 case ISD::UMAX:
5621 return ISD::VECREDUCE_UMAX;
5622 default:
5623 llvm_unreachable("Unexpected opcode");
5624 }
5625 };
5626 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5627 SDLoc(N), VT, N0, N1))
5628 return SD;
5629
5630 // Simplify the operands using demanded-bits information.
5632 return SDValue(N, 0);
5633
5634 return SDValue();
5635}
5636
5637/// If this is a bitwise logic instruction and both operands have the same
5638/// opcode, try to sink the other opcode after the logic instruction.
5639SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5640 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5641 EVT VT = N0.getValueType();
5642 unsigned LogicOpcode = N->getOpcode();
5643 unsigned HandOpcode = N0.getOpcode();
5644 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5645 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5646
5647 // Bail early if none of these transforms apply.
5648 if (N0.getNumOperands() == 0)
5649 return SDValue();
5650
5651 // FIXME: We should check number of uses of the operands to not increase
5652 // the instruction count for all transforms.
5653
5654 // Handle size-changing casts (or sign_extend_inreg).
5655 SDValue X = N0.getOperand(0);
5656 SDValue Y = N1.getOperand(0);
5657 EVT XVT = X.getValueType();
5658 SDLoc DL(N);
5659 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5660 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5661 N0.getOperand(1) == N1.getOperand(1))) {
5662 // If both operands have other uses, this transform would create extra
5663 // instructions without eliminating anything.
5664 if (!N0.hasOneUse() && !N1.hasOneUse())
5665 return SDValue();
5666 // We need matching integer source types.
5667 if (XVT != Y.getValueType())
5668 return SDValue();
5669 // Don't create an illegal op during or after legalization. Don't ever
5670 // create an unsupported vector op.
5671 if ((VT.isVector() || LegalOperations) &&
5672 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5673 return SDValue();
5674 // Avoid infinite looping with PromoteIntBinOp.
5675 // TODO: Should we apply desirable/legal constraints to all opcodes?
5676 if ((HandOpcode == ISD::ANY_EXTEND ||
5677 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5678 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5679 return SDValue();
5680 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5681 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5682 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5683 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5684 return DAG.getNode(HandOpcode, DL, VT, Logic);
5685 }
5686
5687 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5688 if (HandOpcode == ISD::TRUNCATE) {
5689 // If both operands have other uses, this transform would create extra
5690 // instructions without eliminating anything.
5691 if (!N0.hasOneUse() && !N1.hasOneUse())
5692 return SDValue();
5693 // We need matching source types.
5694 if (XVT != Y.getValueType())
5695 return SDValue();
5696 // Don't create an illegal op during or after legalization.
5697 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5698 return SDValue();
5699 // Be extra careful sinking truncate. If it's free, there's no benefit in
5700 // widening a binop. Also, don't create a logic op on an illegal type.
5701 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5702 return SDValue();
5703 if (!TLI.isTypeLegal(XVT))
5704 return SDValue();
5705 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5706 return DAG.getNode(HandOpcode, DL, VT, Logic);
5707 }
5708
5709 // For binops SHL/SRL/SRA/AND:
5710 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5711 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5712 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5713 N0.getOperand(1) == N1.getOperand(1)) {
5714 // If either operand has other uses, this transform is not an improvement.
5715 if (!N0.hasOneUse() || !N1.hasOneUse())
5716 return SDValue();
5717 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5718 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5719 }
5720
5721 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5722 if (HandOpcode == ISD::BSWAP) {
5723 // If either operand has other uses, this transform is not an improvement.
5724 if (!N0.hasOneUse() || !N1.hasOneUse())
5725 return SDValue();
5726 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5727 return DAG.getNode(HandOpcode, DL, VT, Logic);
5728 }
5729
5730 // For funnel shifts FSHL/FSHR:
5731 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5732 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5733 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5734 N0.getOperand(2) == N1.getOperand(2)) {
5735 if (!N0.hasOneUse() || !N1.hasOneUse())
5736 return SDValue();
5737 SDValue X1 = N0.getOperand(1);
5738 SDValue Y1 = N1.getOperand(1);
5739 SDValue S = N0.getOperand(2);
5740 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5741 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5742 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5743 }
5744
5745 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5746 // Only perform this optimization up until type legalization, before
5747 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5748 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5749 // we don't want to undo this promotion.
5750 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5751 // on scalars.
5752 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5753 Level <= AfterLegalizeTypes) {
5754 // Input types must be integer and the same.
5755 if (XVT.isInteger() && XVT == Y.getValueType() &&
5756 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5757 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5758 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5759 return DAG.getNode(HandOpcode, DL, VT, Logic);
5760 }
5761 }
5762
5763 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5764 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5765 // If both shuffles use the same mask, and both shuffle within a single
5766 // vector, then it is worthwhile to move the swizzle after the operation.
5767 // The type-legalizer generates this pattern when loading illegal
5768 // vector types from memory. In many cases this allows additional shuffle
5769 // optimizations.
5770 // There are other cases where moving the shuffle after the xor/and/or
5771 // is profitable even if shuffles don't perform a swizzle.
5772 // If both shuffles use the same mask, and both shuffles have the same first
5773 // or second operand, then it might still be profitable to move the shuffle
5774 // after the xor/and/or operation.
5775 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5776 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5777 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5778 assert(X.getValueType() == Y.getValueType() &&
5779 "Inputs to shuffles are not the same type");
5780
5781 // Check that both shuffles use the same mask. The masks are known to be of
5782 // the same length because the result vector type is the same.
5783 // Check also that shuffles have only one use to avoid introducing extra
5784 // instructions.
5785 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5786 !SVN0->getMask().equals(SVN1->getMask()))
5787 return SDValue();
5788
5789 // Don't try to fold this node if it requires introducing a
5790 // build vector of all zeros that might be illegal at this stage.
5791 SDValue ShOp = N0.getOperand(1);
5792 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5793 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5794
5795 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5796 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5797 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5798 N0.getOperand(0), N1.getOperand(0));
5799 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5800 }
5801
5802 // Don't try to fold this node if it requires introducing a
5803 // build vector of all zeros that might be illegal at this stage.
5804 ShOp = N0.getOperand(0);
5805 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5806 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5807
5808 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5809 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5810 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5811 N1.getOperand(1));
5812 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5813 }
5814 }
5815
5816 return SDValue();
5817}
5818
5819/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5820SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5821 const SDLoc &DL) {
5822 SDValue LL, LR, RL, RR, N0CC, N1CC;
5823 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5824 !isSetCCEquivalent(N1, RL, RR, N1CC))
5825 return SDValue();
5826
5827 assert(N0.getValueType() == N1.getValueType() &&
5828 "Unexpected operand types for bitwise logic op");
5829 assert(LL.getValueType() == LR.getValueType() &&
5830 RL.getValueType() == RR.getValueType() &&
5831 "Unexpected operand types for setcc");
5832
5833 // If we're here post-legalization or the logic op type is not i1, the logic
5834 // op type must match a setcc result type. Also, all folds require new
5835 // operations on the left and right operands, so those types must match.
5836 EVT VT = N0.getValueType();
5837 EVT OpVT = LL.getValueType();
5838 if (LegalOperations || VT.getScalarType() != MVT::i1)
5839 if (VT != getSetCCResultType(OpVT))
5840 return SDValue();
5841 if (OpVT != RL.getValueType())
5842 return SDValue();
5843
5844 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5845 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5846 bool IsInteger = OpVT.isInteger();
5847 if (LR == RR && CC0 == CC1 && IsInteger) {
5848 bool IsZero = isNullOrNullSplat(LR);
5849 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5850
5851 // All bits clear?
5852 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5853 // All sign bits clear?
5854 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5855 // Any bits set?
5856 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5857 // Any sign bits set?
5858 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5859
5860 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5861 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5862 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5863 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5864 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5865 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5866 AddToWorklist(Or.getNode());
5867 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5868 }
5869
5870 // All bits set?
5871 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5872 // All sign bits set?
5873 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5874 // Any bits clear?
5875 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5876 // Any sign bits clear?
5877 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5878
5879 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5880 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5881 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5882 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5883 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5884 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5885 AddToWorklist(And.getNode());
5886 return DAG.getSetCC(DL, VT, And, LR, CC1);
5887 }
5888 }
5889
5890 // TODO: What is the 'or' equivalent of this fold?
5891 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5892 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5893 IsInteger && CC0 == ISD::SETNE &&
5894 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5895 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5896 SDValue One = DAG.getConstant(1, DL, OpVT);
5897 SDValue Two = DAG.getConstant(2, DL, OpVT);
5898 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5899 AddToWorklist(Add.getNode());
5900 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5901 }
5902
5903 // Try more general transforms if the predicates match and the only user of
5904 // the compares is the 'and' or 'or'.
5905 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5906 N0.hasOneUse() && N1.hasOneUse()) {
5907 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5908 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5909 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5910 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5911 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5912 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5913 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5914 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5915 }
5916
5917 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5918 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5919 // Match a shared variable operand and 2 non-opaque constant operands.
5920 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5921 // The difference of the constants must be a single bit.
5922 const APInt &CMax =
5923 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5924 const APInt &CMin =
5925 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5926 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5927 };
5928 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5929 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5930 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5931 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5932 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5933 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5934 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5935 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5936 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5937 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5938 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5939 }
5940 }
5941 }
5942
5943 // Canonicalize equivalent operands to LL == RL.
5944 if (LL == RR && LR == RL) {
5946 std::swap(RL, RR);
5947 }
5948
5949 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5950 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5951 if (LL == RL && LR == RR) {
5952 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5953 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5954 if (NewCC != ISD::SETCC_INVALID &&
5955 (!LegalOperations ||
5956 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5957 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5958 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5959 }
5960
5961 return SDValue();
5962}
5963
5964static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
5965 SelectionDAG &DAG) {
5966 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
5967}
5968
5969static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
5970 SelectionDAG &DAG) {
5971 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
5972}
5973
5974static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
5975 ISD::CondCode CC, unsigned OrAndOpcode,
5976 SelectionDAG &DAG,
5977 bool isFMAXNUMFMINNUM_IEEE,
5978 bool isFMAXNUMFMINNUM) {
5979 // The optimization cannot be applied for all the predicates because
5980 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
5981 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
5982 // applied at all if one of the operands is a signaling NaN.
5983
5984 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
5985 // are non NaN values.
5986 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
5987 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
5988 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5989 isFMAXNUMFMINNUM_IEEE
5992 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
5993 (OrAndOpcode == ISD::OR)) ||
5994 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
5995 (OrAndOpcode == ISD::AND)))
5996 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5997 isFMAXNUMFMINNUM_IEEE
6000 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6001 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6002 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6003 // that there are not any sNaNs, then the optimization is not valid
6004 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6005 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6006 // we can prove that we do not have any sNaNs, then we can do the
6007 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6008 // cases.
6009 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6010 (OrAndOpcode == ISD::OR)) ||
6011 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6012 (OrAndOpcode == ISD::AND)))
6013 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6014 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6015 isFMAXNUMFMINNUM_IEEE
6018 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6019 (OrAndOpcode == ISD::OR)) ||
6020 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6021 (OrAndOpcode == ISD::AND)))
6022 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6023 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6024 isFMAXNUMFMINNUM_IEEE
6027 return ISD::DELETED_NODE;
6028}
6029
6032 assert(
6033 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6034 "Invalid Op to combine SETCC with");
6035
6036 // TODO: Search past casts/truncates.
6037 SDValue LHS = LogicOp->getOperand(0);
6038 SDValue RHS = LogicOp->getOperand(1);
6039 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6040 !LHS->hasOneUse() || !RHS->hasOneUse())
6041 return SDValue();
6042
6043 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6045 LogicOp, LHS.getNode(), RHS.getNode());
6046
6047 SDValue LHS0 = LHS->getOperand(0);
6048 SDValue RHS0 = RHS->getOperand(0);
6049 SDValue LHS1 = LHS->getOperand(1);
6050 SDValue RHS1 = RHS->getOperand(1);
6051 // TODO: We don't actually need a splat here, for vectors we just need the
6052 // invariants to hold for each element.
6053 auto *LHS1C = isConstOrConstSplat(LHS1);
6054 auto *RHS1C = isConstOrConstSplat(RHS1);
6055 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6056 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6057 EVT VT = LogicOp->getValueType(0);
6058 EVT OpVT = LHS0.getValueType();
6059 SDLoc DL(LogicOp);
6060
6061 // Check if the operands of an and/or operation are comparisons and if they
6062 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6063 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6064 // sequence will be replaced with min-cmp sequence:
6065 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6066 // and and-cmp-cmp will be replaced with max-cmp sequence:
6067 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6068 // The optimization does not work for `==` or `!=` .
6069 // The two comparisons should have either the same predicate or the
6070 // predicate of one of the comparisons is the opposite of the other one.
6071 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6073 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6075 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6076 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6077 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6078 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6079 (OpVT.isFloatingPoint() &&
6080 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6082 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6083 CCL != ISD::SETTRUE &&
6084 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6085
6086 SDValue CommonValue, Operand1, Operand2;
6088 if (CCL == CCR) {
6089 if (LHS0 == RHS0) {
6090 CommonValue = LHS0;
6091 Operand1 = LHS1;
6092 Operand2 = RHS1;
6094 } else if (LHS1 == RHS1) {
6095 CommonValue = LHS1;
6096 Operand1 = LHS0;
6097 Operand2 = RHS0;
6098 CC = CCL;
6099 }
6100 } else {
6101 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6102 if (LHS0 == RHS1) {
6103 CommonValue = LHS0;
6104 Operand1 = LHS1;
6105 Operand2 = RHS0;
6106 CC = CCR;
6107 } else if (RHS0 == LHS1) {
6108 CommonValue = LHS1;
6109 Operand1 = LHS0;
6110 Operand2 = RHS1;
6111 CC = CCL;
6112 }
6113 }
6114
6115 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6116 // handle it using OR/AND.
6117 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6119 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6121
6122 if (CC != ISD::SETCC_INVALID) {
6123 unsigned NewOpcode = ISD::DELETED_NODE;
6124 bool IsSigned = isSignedIntSetCC(CC);
6125 if (OpVT.isInteger()) {
6126 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6127 CC == ISD::SETLT || CC == ISD::SETULT);
6128 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6129 if (IsLess == IsOr)
6130 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6131 else
6132 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6133 } else if (OpVT.isFloatingPoint())
6134 NewOpcode =
6135 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6136 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6137
6138 if (NewOpcode != ISD::DELETED_NODE) {
6139 SDValue MinMaxValue =
6140 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6141 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6142 }
6143 }
6144 }
6145
6146 if (TargetPreference == AndOrSETCCFoldKind::None)
6147 return SDValue();
6148
6149 if (CCL == CCR &&
6150 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6151 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6152 const APInt &APLhs = LHS1C->getAPIntValue();
6153 const APInt &APRhs = RHS1C->getAPIntValue();
6154
6155 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6156 // case this is just a compare).
6157 if (APLhs == (-APRhs) &&
6158 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6159 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6160 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6161 // (icmp eq A, C) | (icmp eq A, -C)
6162 // -> (icmp eq Abs(A), C)
6163 // (icmp ne A, C) & (icmp ne A, -C)
6164 // -> (icmp ne Abs(A), C)
6165 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6166 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6167 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6168 } else if (TargetPreference &
6170
6171 // AndOrSETCCFoldKind::AddAnd:
6172 // A == C0 | A == C1
6173 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6174 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6175 // A != C0 & A != C1
6176 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6177 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6178
6179 // AndOrSETCCFoldKind::NotAnd:
6180 // A == C0 | A == C1
6181 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6182 // -> ~A & smin(C0, C1) == 0
6183 // A != C0 & A != C1
6184 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6185 // -> ~A & smin(C0, C1) != 0
6186
6187 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6188 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6189 APInt Dif = MaxC - MinC;
6190 if (!Dif.isZero() && Dif.isPowerOf2()) {
6191 if (MaxC.isAllOnes() &&
6192 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6193 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6194 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6195 DAG.getConstant(MinC, DL, OpVT));
6196 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6197 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6198 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6199
6200 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6201 DAG.getConstant(-MinC, DL, OpVT));
6202 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6203 DAG.getConstant(~Dif, DL, OpVT));
6204 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6205 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6206 }
6207 }
6208 }
6209 }
6210
6211 return SDValue();
6212}
6213
6214// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6215// We canonicalize to the `select` form in the middle end, but the `and` form
6216// gets better codegen and all tested targets (arm, x86, riscv)
6218 const SDLoc &DL, SelectionDAG &DAG) {
6219 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6220 if (!isNullConstant(F))
6221 return SDValue();
6222
6223 EVT CondVT = Cond.getValueType();
6224 if (TLI.getBooleanContents(CondVT) !=
6226 return SDValue();
6227
6228 if (T.getOpcode() != ISD::AND)
6229 return SDValue();
6230
6231 if (!isOneConstant(T.getOperand(1)))
6232 return SDValue();
6233
6234 EVT OpVT = T.getValueType();
6235
6236 SDValue CondMask =
6237 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6238 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6239}
6240
6241/// This contains all DAGCombine rules which reduce two values combined by
6242/// an And operation to a single value. This makes them reusable in the context
6243/// of visitSELECT(). Rules involving constants are not included as
6244/// visitSELECT() already handles those cases.
6245SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6246 EVT VT = N1.getValueType();
6247 SDLoc DL(N);
6248
6249 // fold (and x, undef) -> 0
6250 if (N0.isUndef() || N1.isUndef())
6251 return DAG.getConstant(0, DL, VT);
6252
6253 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6254 return V;
6255
6256 // Canonicalize:
6257 // and(x, add) -> and(add, x)
6258 if (N1.getOpcode() == ISD::ADD)
6259 std::swap(N0, N1);
6260
6261 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6262 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6263 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6264 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6265 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6266 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6267 // immediate for an add, but it is legal if its top c2 bits are set,
6268 // transform the ADD so the immediate doesn't need to be materialized
6269 // in a register.
6270 APInt ADDC = ADDI->getAPIntValue();
6271 APInt SRLC = SRLI->getAPIntValue();
6272 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6273 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6275 SRLC.getZExtValue());
6276 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6277 ADDC |= Mask;
6278 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6279 SDLoc DL0(N0);
6280 SDValue NewAdd =
6281 DAG.getNode(ISD::ADD, DL0, VT,
6282 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6283 CombineTo(N0.getNode(), NewAdd);
6284 // Return N so it doesn't get rechecked!
6285 return SDValue(N, 0);
6286 }
6287 }
6288 }
6289 }
6290 }
6291 }
6292
6293 return SDValue();
6294}
6295
6296bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6297 EVT LoadResultTy, EVT &ExtVT) {
6298 if (!AndC->getAPIntValue().isMask())
6299 return false;
6300
6301 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6302
6303 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6304 EVT LoadedVT = LoadN->getMemoryVT();
6305
6306 if (ExtVT == LoadedVT &&
6307 (!LegalOperations ||
6308 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6309 // ZEXTLOAD will match without needing to change the size of the value being
6310 // loaded.
6311 return true;
6312 }
6313
6314 // Do not change the width of a volatile or atomic loads.
6315 if (!LoadN->isSimple())
6316 return false;
6317
6318 // Do not generate loads of non-round integer types since these can
6319 // be expensive (and would be wrong if the type is not byte sized).
6320 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6321 return false;
6322
6323 if (LegalOperations &&
6324 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6325 return false;
6326
6327 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6328 return false;
6329
6330 return true;
6331}
6332
6333bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6334 ISD::LoadExtType ExtType, EVT &MemVT,
6335 unsigned ShAmt) {
6336 if (!LDST)
6337 return false;
6338 // Only allow byte offsets.
6339 if (ShAmt % 8)
6340 return false;
6341
6342 // Do not generate loads of non-round integer types since these can
6343 // be expensive (and would be wrong if the type is not byte sized).
6344 if (!MemVT.isRound())
6345 return false;
6346
6347 // Don't change the width of a volatile or atomic loads.
6348 if (!LDST->isSimple())
6349 return false;
6350
6351 EVT LdStMemVT = LDST->getMemoryVT();
6352
6353 // Bail out when changing the scalable property, since we can't be sure that
6354 // we're actually narrowing here.
6355 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6356 return false;
6357
6358 // Verify that we are actually reducing a load width here.
6359 if (LdStMemVT.bitsLT(MemVT))
6360 return false;
6361
6362 // Ensure that this isn't going to produce an unsupported memory access.
6363 if (ShAmt) {
6364 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6365 const unsigned ByteShAmt = ShAmt / 8;
6366 const Align LDSTAlign = LDST->getAlign();
6367 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6368 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6369 LDST->getAddressSpace(), NarrowAlign,
6370 LDST->getMemOperand()->getFlags()))
6371 return false;
6372 }
6373
6374 // It's not possible to generate a constant of extended or untyped type.
6375 EVT PtrType = LDST->getBasePtr().getValueType();
6376 if (PtrType == MVT::Untyped || PtrType.isExtended())
6377 return false;
6378
6379 if (isa<LoadSDNode>(LDST)) {
6380 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6381 // Don't transform one with multiple uses, this would require adding a new
6382 // load.
6383 if (!SDValue(Load, 0).hasOneUse())
6384 return false;
6385
6386 if (LegalOperations &&
6387 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6388 return false;
6389
6390 // For the transform to be legal, the load must produce only two values
6391 // (the value loaded and the chain). Don't transform a pre-increment
6392 // load, for example, which produces an extra value. Otherwise the
6393 // transformation is not equivalent, and the downstream logic to replace
6394 // uses gets things wrong.
6395 if (Load->getNumValues() > 2)
6396 return false;
6397
6398 // If the load that we're shrinking is an extload and we're not just
6399 // discarding the extension we can't simply shrink the load. Bail.
6400 // TODO: It would be possible to merge the extensions in some cases.
6401 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6402 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6403 return false;
6404
6405 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6406 return false;
6407 } else {
6408 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6409 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6410 // Can't write outside the original store
6411 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6412 return false;
6413
6414 if (LegalOperations &&
6415 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6416 return false;
6417 }
6418 return true;
6419}
6420
6421bool DAGCombiner::SearchForAndLoads(SDNode *N,
6423 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6424 ConstantSDNode *Mask,
6425 SDNode *&NodeToMask) {
6426 // Recursively search for the operands, looking for loads which can be
6427 // narrowed.
6428 for (SDValue Op : N->op_values()) {
6429 if (Op.getValueType().isVector())
6430 return false;
6431
6432 // Some constants may need fixing up later if they are too large.
6433 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6434 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6435 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6436 NodesWithConsts.insert(N);
6437 continue;
6438 }
6439
6440 if (!Op.hasOneUse())
6441 return false;
6442
6443 switch(Op.getOpcode()) {
6444 case ISD::LOAD: {
6445 auto *Load = cast<LoadSDNode>(Op);
6446 EVT ExtVT;
6447 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6448 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6449
6450 // ZEXTLOAD is already small enough.
6451 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6452 ExtVT.bitsGE(Load->getMemoryVT()))
6453 continue;
6454
6455 // Use LE to convert equal sized loads to zext.
6456 if (ExtVT.bitsLE(Load->getMemoryVT()))
6457 Loads.push_back(Load);
6458
6459 continue;
6460 }
6461 return false;
6462 }
6463 case ISD::ZERO_EXTEND:
6464 case ISD::AssertZext: {
6465 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6466 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6467 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6468 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6469 Op.getOperand(0).getValueType();
6470
6471 // We can accept extending nodes if the mask is wider or an equal
6472 // width to the original type.
6473 if (ExtVT.bitsGE(VT))
6474 continue;
6475 break;
6476 }
6477 case ISD::OR:
6478 case ISD::XOR:
6479 case ISD::AND:
6480 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6481 NodeToMask))
6482 return false;
6483 continue;
6484 }
6485
6486 // Allow one node which will masked along with any loads found.
6487 if (NodeToMask)
6488 return false;
6489
6490 // Also ensure that the node to be masked only produces one data result.
6491 NodeToMask = Op.getNode();
6492 if (NodeToMask->getNumValues() > 1) {
6493 bool HasValue = false;
6494 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6495 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6496 if (VT != MVT::Glue && VT != MVT::Other) {
6497 if (HasValue) {
6498 NodeToMask = nullptr;
6499 return false;
6500 }
6501 HasValue = true;
6502 }
6503 }
6504 assert(HasValue && "Node to be masked has no data result?");
6505 }
6506 }
6507 return true;
6508}
6509
6510bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6511 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6512 if (!Mask)
6513 return false;
6514
6515 if (!Mask->getAPIntValue().isMask())
6516 return false;
6517
6518 // No need to do anything if the and directly uses a load.
6519 if (isa<LoadSDNode>(N->getOperand(0)))
6520 return false;
6521
6523 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6524 SDNode *FixupNode = nullptr;
6525 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6526 if (Loads.empty())
6527 return false;
6528
6529 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6530 SDValue MaskOp = N->getOperand(1);
6531
6532 // If it exists, fixup the single node we allow in the tree that needs
6533 // masking.
6534 if (FixupNode) {
6535 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6536 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6537 FixupNode->getValueType(0),
6538 SDValue(FixupNode, 0), MaskOp);
6539 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6540 if (And.getOpcode() == ISD ::AND)
6541 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6542 }
6543
6544 // Narrow any constants that need it.
6545 for (auto *LogicN : NodesWithConsts) {
6546 SDValue Op0 = LogicN->getOperand(0);
6547 SDValue Op1 = LogicN->getOperand(1);
6548
6549 if (isa<ConstantSDNode>(Op0))
6550 Op0 =
6551 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6552
6553 if (isa<ConstantSDNode>(Op1))
6554 Op1 =
6555 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6556
6557 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6558 std::swap(Op0, Op1);
6559
6560 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6561 }
6562
6563 // Create narrow loads.
6564 for (auto *Load : Loads) {
6565 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6566 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6567 SDValue(Load, 0), MaskOp);
6568 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6569 if (And.getOpcode() == ISD ::AND)
6570 And = SDValue(
6571 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6572 SDValue NewLoad = reduceLoadWidth(And.getNode());
6573 assert(NewLoad &&
6574 "Shouldn't be masking the load if it can't be narrowed");
6575 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6576 }
6577 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6578 return true;
6579 }
6580 return false;
6581}
6582
6583// Unfold
6584// x & (-1 'logical shift' y)
6585// To
6586// (x 'opposite logical shift' y) 'logical shift' y
6587// if it is better for performance.
6588SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6589 assert(N->getOpcode() == ISD::AND);
6590
6591 SDValue N0 = N->getOperand(0);
6592 SDValue N1 = N->getOperand(1);
6593
6594 // Do we actually prefer shifts over mask?
6596 return SDValue();
6597
6598 // Try to match (-1 '[outer] logical shift' y)
6599 unsigned OuterShift;
6600 unsigned InnerShift; // The opposite direction to the OuterShift.
6601 SDValue Y; // Shift amount.
6602 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6603 if (!M.hasOneUse())
6604 return false;
6605 OuterShift = M->getOpcode();
6606 if (OuterShift == ISD::SHL)
6607 InnerShift = ISD::SRL;
6608 else if (OuterShift == ISD::SRL)
6609 InnerShift = ISD::SHL;
6610 else
6611 return false;
6612 if (!isAllOnesConstant(M->getOperand(0)))
6613 return false;
6614 Y = M->getOperand(1);
6615 return true;
6616 };
6617
6618 SDValue X;
6619 if (matchMask(N1))
6620 X = N0;
6621 else if (matchMask(N0))
6622 X = N1;
6623 else
6624 return SDValue();
6625
6626 SDLoc DL(N);
6627 EVT VT = N->getValueType(0);
6628
6629 // tmp = x 'opposite logical shift' y
6630 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6631 // ret = tmp 'logical shift' y
6632 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6633
6634 return T1;
6635}
6636
6637/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6638/// For a target with a bit test, this is expected to become test + set and save
6639/// at least 1 instruction.
6641 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6642
6643 // Look through an optional extension.
6644 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6645 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6646 And0 = And0.getOperand(0);
6647 if (!isOneConstant(And1) || !And0.hasOneUse())
6648 return SDValue();
6649
6650 SDValue Src = And0;
6651
6652 // Attempt to find a 'not' op.
6653 // TODO: Should we favor test+set even without the 'not' op?
6654 bool FoundNot = false;
6655 if (isBitwiseNot(Src)) {
6656 FoundNot = true;
6657 Src = Src.getOperand(0);
6658
6659 // Look though an optional truncation. The source operand may not be the
6660 // same type as the original 'and', but that is ok because we are masking
6661 // off everything but the low bit.
6662 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6663 Src = Src.getOperand(0);
6664 }
6665
6666 // Match a shift-right by constant.
6667 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6668 return SDValue();
6669
6670 // This is probably not worthwhile without a supported type.
6671 EVT SrcVT = Src.getValueType();
6672 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6673 if (!TLI.isTypeLegal(SrcVT))
6674 return SDValue();
6675
6676 // We might have looked through casts that make this transform invalid.
6677 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6678 SDValue ShiftAmt = Src.getOperand(1);
6679 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6680 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6681 return SDValue();
6682
6683 // Set source to shift source.
6684 Src = Src.getOperand(0);
6685
6686 // Try again to find a 'not' op.
6687 // TODO: Should we favor test+set even with two 'not' ops?
6688 if (!FoundNot) {
6689 if (!isBitwiseNot(Src))
6690 return SDValue();
6691 Src = Src.getOperand(0);
6692 }
6693
6694 if (!TLI.hasBitTest(Src, ShiftAmt))
6695 return SDValue();
6696
6697 // Turn this into a bit-test pattern using mask op + setcc:
6698 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6699 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6700 SDLoc DL(And);
6701 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6702 EVT CCVT =
6703 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6704 SDValue Mask = DAG.getConstant(
6705 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6706 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6707 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6708 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6709 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6710}
6711
6712/// For targets that support usubsat, match a bit-hack form of that operation
6713/// that ends in 'and' and convert it.
6715 EVT VT = N->getValueType(0);
6716 unsigned BitWidth = VT.getScalarSizeInBits();
6717 APInt SignMask = APInt::getSignMask(BitWidth);
6718
6719 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6720 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6721 // xor/add with SMIN (signmask) are logically equivalent.
6722 SDValue X;
6723 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6725 m_SpecificInt(BitWidth - 1))))) &&
6728 m_SpecificInt(BitWidth - 1))))))
6729 return SDValue();
6730
6731 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6732 DAG.getConstant(SignMask, DL, VT));
6733}
6734
6735/// Given a bitwise logic operation N with a matching bitwise logic operand,
6736/// fold a pattern where 2 of the source operands are identically shifted
6737/// values. For example:
6738/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6740 SelectionDAG &DAG) {
6741 unsigned LogicOpcode = N->getOpcode();
6742 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6743 "Expected bitwise logic operation");
6744
6745 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6746 return SDValue();
6747
6748 // Match another bitwise logic op and a shift.
6749 unsigned ShiftOpcode = ShiftOp.getOpcode();
6750 if (LogicOp.getOpcode() != LogicOpcode ||
6751 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6752 ShiftOpcode == ISD::SRA))
6753 return SDValue();
6754
6755 // Match another shift op inside the first logic operand. Handle both commuted
6756 // possibilities.
6757 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6758 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6759 SDValue X1 = ShiftOp.getOperand(0);
6760 SDValue Y = ShiftOp.getOperand(1);
6761 SDValue X0, Z;
6762 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6763 LogicOp.getOperand(0).getOperand(1) == Y) {
6764 X0 = LogicOp.getOperand(0).getOperand(0);
6765 Z = LogicOp.getOperand(1);
6766 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6767 LogicOp.getOperand(1).getOperand(1) == Y) {
6768 X0 = LogicOp.getOperand(1).getOperand(0);
6769 Z = LogicOp.getOperand(0);
6770 } else {
6771 return SDValue();
6772 }
6773
6774 EVT VT = N->getValueType(0);
6775 SDLoc DL(N);
6776 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6777 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6778 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6779}
6780
6781/// Given a tree of logic operations with shape like
6782/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6783/// try to match and fold shift operations with the same shift amount.
6784/// For example:
6785/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6786/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6788 SDValue RightHand, SelectionDAG &DAG) {
6789 unsigned LogicOpcode = N->getOpcode();
6790 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6791 "Expected bitwise logic operation");
6792 if (LeftHand.getOpcode() != LogicOpcode ||
6793 RightHand.getOpcode() != LogicOpcode)
6794 return SDValue();
6795 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6796 return SDValue();
6797
6798 // Try to match one of following patterns:
6799 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6800 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6801 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6802 // itself.
6803 SDValue CombinedShifts, W;
6804 SDValue R0 = RightHand.getOperand(0);
6805 SDValue R1 = RightHand.getOperand(1);
6806 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6807 W = R1;
6808 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6809 W = R0;
6810 else
6811 return SDValue();
6812
6813 EVT VT = N->getValueType(0);
6814 SDLoc DL(N);
6815 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6816}
6817
6818SDValue DAGCombiner::visitAND(SDNode *N) {
6819 SDValue N0 = N->getOperand(0);
6820 SDValue N1 = N->getOperand(1);
6821 EVT VT = N1.getValueType();
6822 SDLoc DL(N);
6823
6824 // x & x --> x
6825 if (N0 == N1)
6826 return N0;
6827
6828 // fold (and c1, c2) -> c1&c2
6829 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6830 return C;
6831
6832 // canonicalize constant to RHS
6835 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6836
6837 if (areBitwiseNotOfEachother(N0, N1))
6838 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6839
6840 // fold vector ops
6841 if (VT.isVector()) {
6842 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6843 return FoldedVOp;
6844
6845 // fold (and x, 0) -> 0, vector edition
6847 // do not return N1, because undef node may exist in N1
6849 N1.getValueType());
6850
6851 // fold (and x, -1) -> x, vector edition
6853 return N0;
6854
6855 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6856 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6857 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6858 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6859 N1.hasOneUse()) {
6860 EVT LoadVT = MLoad->getMemoryVT();
6861 EVT ExtVT = VT;
6862 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6863 // For this AND to be a zero extension of the masked load the elements
6864 // of the BuildVec must mask the bottom bits of the extended element
6865 // type
6866 uint64_t ElementSize =
6868 if (Splat->getAPIntValue().isMask(ElementSize)) {
6869 SDValue NewLoad = DAG.getMaskedLoad(
6870 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6871 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6872 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6873 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6874 bool LoadHasOtherUsers = !N0.hasOneUse();
6875 CombineTo(N, NewLoad);
6876 if (LoadHasOtherUsers)
6877 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6878 return SDValue(N, 0);
6879 }
6880 }
6881 }
6882 }
6883
6884 // fold (and x, -1) -> x
6885 if (isAllOnesConstant(N1))
6886 return N0;
6887
6888 // if (and x, c) is known to be zero, return 0
6889 unsigned BitWidth = VT.getScalarSizeInBits();
6892 return DAG.getConstant(0, DL, VT);
6893
6894 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6895 return R;
6896
6897 if (SDValue NewSel = foldBinOpIntoSelect(N))
6898 return NewSel;
6899
6900 // reassociate and
6901 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6902 return RAND;
6903
6904 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
6905 if (SDValue SD =
6906 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
6907 return SD;
6908
6909 // fold (and (or x, C), D) -> D if (C & D) == D
6910 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6911 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6912 };
6913 if (N0.getOpcode() == ISD::OR &&
6914 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6915 return N1;
6916
6917 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6918 SDValue N0Op0 = N0.getOperand(0);
6919 EVT SrcVT = N0Op0.getValueType();
6920 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
6921 APInt Mask = ~N1C->getAPIntValue();
6922 Mask = Mask.trunc(SrcBitWidth);
6923
6924 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6925 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6926 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
6927
6928 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
6929 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
6930 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
6931 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
6932 TLI.isNarrowingProfitable(VT, SrcVT))
6933 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
6934 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
6935 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
6936 }
6937
6938 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6939 if (ISD::isExtOpcode(N0.getOpcode())) {
6940 unsigned ExtOpc = N0.getOpcode();
6941 SDValue N0Op0 = N0.getOperand(0);
6942 if (N0Op0.getOpcode() == ISD::AND &&
6943 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6946 N0->hasOneUse() && N0Op0->hasOneUse()) {
6947 SDValue NewMask =
6948 DAG.getNode(ISD::AND, DL, VT, N1,
6949 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6950 return DAG.getNode(ISD::AND, DL, VT,
6951 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6952 NewMask);
6953 }
6954 }
6955
6956 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6957 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6958 // already be zero by virtue of the width of the base type of the load.
6959 //
6960 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6961 // more cases.
6962 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6964 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6965 N0.getOperand(0).getResNo() == 0) ||
6966 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6967 auto *Load =
6968 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
6969
6970 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6971 // This can be a pure constant or a vector splat, in which case we treat the
6972 // vector as a scalar and use the splat value.
6975 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6976 Constant = C->getAPIntValue();
6977 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6978 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6979 APInt SplatValue, SplatUndef;
6980 unsigned SplatBitSize;
6981 bool HasAnyUndefs;
6982 // Endianness should not matter here. Code below makes sure that we only
6983 // use the result if the SplatBitSize is a multiple of the vector element
6984 // size. And after that we AND all element sized parts of the splat
6985 // together. So the end result should be the same regardless of in which
6986 // order we do those operations.
6987 const bool IsBigEndian = false;
6988 bool IsSplat =
6989 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6990 HasAnyUndefs, EltBitWidth, IsBigEndian);
6991
6992 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6993 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6994 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
6995 // Undef bits can contribute to a possible optimisation if set, so
6996 // set them.
6997 SplatValue |= SplatUndef;
6998
6999 // The splat value may be something like "0x00FFFFFF", which means 0 for
7000 // the first vector value and FF for the rest, repeating. We need a mask
7001 // that will apply equally to all members of the vector, so AND all the
7002 // lanes of the constant together.
7003 Constant = APInt::getAllOnes(EltBitWidth);
7004 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7005 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7006 }
7007 }
7008
7009 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7010 // actually legal and isn't going to get expanded, else this is a false
7011 // optimisation.
7012 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7013 Load->getValueType(0),
7014 Load->getMemoryVT());
7015
7016 // Resize the constant to the same size as the original memory access before
7017 // extension. If it is still the AllOnesValue then this AND is completely
7018 // unneeded.
7019 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7020
7021 bool B;
7022 switch (Load->getExtensionType()) {
7023 default: B = false; break;
7024 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7025 case ISD::ZEXTLOAD:
7026 case ISD::NON_EXTLOAD: B = true; break;
7027 }
7028
7029 if (B && Constant.isAllOnes()) {
7030 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7031 // preserve semantics once we get rid of the AND.
7032 SDValue NewLoad(Load, 0);
7033
7034 // Fold the AND away. NewLoad may get replaced immediately.
7035 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7036
7037 if (Load->getExtensionType() == ISD::EXTLOAD) {
7038 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7039 Load->getValueType(0), SDLoc(Load),
7040 Load->getChain(), Load->getBasePtr(),
7041 Load->getOffset(), Load->getMemoryVT(),
7042 Load->getMemOperand());
7043 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7044 if (Load->getNumValues() == 3) {
7045 // PRE/POST_INC loads have 3 values.
7046 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7047 NewLoad.getValue(2) };
7048 CombineTo(Load, To, 3, true);
7049 } else {
7050 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7051 }
7052 }
7053
7054 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7055 }
7056 }
7057
7058 // Try to convert a constant mask AND into a shuffle clear mask.
7059 if (VT.isVector())
7060 if (SDValue Shuffle = XformToShuffleWithZero(N))
7061 return Shuffle;
7062
7063 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7064 return Combined;
7065
7066 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7068 SDValue Ext = N0.getOperand(0);
7069 EVT ExtVT = Ext->getValueType(0);
7070 SDValue Extendee = Ext->getOperand(0);
7071
7072 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7073 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7074 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7075 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7076 // => (extract_subvector (iN_zeroext v))
7077 SDValue ZeroExtExtendee =
7078 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7079
7080 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7081 N0.getOperand(1));
7082 }
7083 }
7084
7085 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7086 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7087 EVT MemVT = GN0->getMemoryVT();
7088 EVT ScalarVT = MemVT.getScalarType();
7089
7090 if (SDValue(GN0, 0).hasOneUse() &&
7091 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7093 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7094 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7095
7096 SDValue ZExtLoad = DAG.getMaskedGather(
7097 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7098 GN0->getIndexType(), ISD::ZEXTLOAD);
7099
7100 CombineTo(N, ZExtLoad);
7101 AddToWorklist(ZExtLoad.getNode());
7102 // Avoid recheck of N.
7103 return SDValue(N, 0);
7104 }
7105 }
7106
7107 // fold (and (load x), 255) -> (zextload x, i8)
7108 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7109 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7110 if (SDValue Res = reduceLoadWidth(N))
7111 return Res;
7112
7113 if (LegalTypes) {
7114 // Attempt to propagate the AND back up to the leaves which, if they're
7115 // loads, can be combined to narrow loads and the AND node can be removed.
7116 // Perform after legalization so that extend nodes will already be
7117 // combined into the loads.
7118 if (BackwardsPropagateMask(N))
7119 return SDValue(N, 0);
7120 }
7121
7122 if (SDValue Combined = visitANDLike(N0, N1, N))
7123 return Combined;
7124
7125 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7126 if (N0.getOpcode() == N1.getOpcode())
7127 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7128 return V;
7129
7130 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7131 return R;
7132 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7133 return R;
7134
7135 // Masking the negated extension of a boolean is just the zero-extended
7136 // boolean:
7137 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7138 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7139 //
7140 // Note: the SimplifyDemandedBits fold below can make an information-losing
7141 // transform, and then we have no way to find this better fold.
7142 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7143 if (isNullOrNullSplat(N0.getOperand(0))) {
7144 SDValue SubRHS = N0.getOperand(1);
7145 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7146 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7147 return SubRHS;
7148 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7149 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7150 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7151 }
7152 }
7153
7154 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7155 // fold (and (sra)) -> (and (srl)) when possible.
7157 return SDValue(N, 0);
7158
7159 // fold (zext_inreg (extload x)) -> (zextload x)
7160 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7161 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7162 (ISD::isEXTLoad(N0.getNode()) ||
7163 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7164 auto *LN0 = cast<LoadSDNode>(N0);
7165 EVT MemVT = LN0->getMemoryVT();
7166 // If we zero all the possible extended bits, then we can turn this into
7167 // a zextload if we are running before legalize or the operation is legal.
7168 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7169 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7170 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7171 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7172 ((!LegalOperations && LN0->isSimple()) ||
7173 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7174 SDValue ExtLoad =
7175 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7176 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7177 AddToWorklist(N);
7178 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7179 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7180 }
7181 }
7182
7183 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7184 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7185 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7186 N0.getOperand(1), false))
7187 return BSwap;
7188 }
7189
7190 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7191 return Shifts;
7192
7193 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7194 return V;
7195
7196 // Recognize the following pattern:
7197 //
7198 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7199 //
7200 // where bitmask is a mask that clears the upper bits of AndVT. The
7201 // number of bits in bitmask must be a power of two.
7202 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7203 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7204 return false;
7205
7206 auto *C = dyn_cast<ConstantSDNode>(RHS);
7207 if (!C)
7208 return false;
7209
7210 if (!C->getAPIntValue().isMask(
7211 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7212 return false;
7213
7214 return true;
7215 };
7216
7217 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7218 if (IsAndZeroExtMask(N0, N1))
7219 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7220
7221 if (hasOperation(ISD::USUBSAT, VT))
7222 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7223 return V;
7224
7225 // Postpone until legalization completed to avoid interference with bswap
7226 // folding
7227 if (LegalOperations || VT.isVector())
7228 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7229 return R;
7230
7231 return SDValue();
7232}
7233
7234/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7235SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7236 bool DemandHighBits) {
7237 if (!LegalOperations)
7238 return SDValue();
7239
7240 EVT VT = N->getValueType(0);
7241 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7242 return SDValue();
7244 return SDValue();
7245
7246 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7247 bool LookPassAnd0 = false;
7248 bool LookPassAnd1 = false;
7249 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7250 std::swap(N0, N1);
7251 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7252 std::swap(N0, N1);
7253 if (N0.getOpcode() == ISD::AND) {
7254 if (!N0->hasOneUse())
7255 return SDValue();
7256 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7257 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7258 // This is needed for X86.
7259 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7260 N01C->getZExtValue() != 0xFFFF))
7261 return SDValue();
7262 N0 = N0.getOperand(0);
7263 LookPassAnd0 = true;
7264 }
7265
7266 if (N1.getOpcode() == ISD::AND) {
7267 if (!N1->hasOneUse())
7268 return SDValue();
7269 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7270 if (!N11C || N11C->getZExtValue() != 0xFF)
7271 return SDValue();
7272 N1 = N1.getOperand(0);
7273 LookPassAnd1 = true;
7274 }
7275
7276 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7277 std::swap(N0, N1);
7278 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7279 return SDValue();
7280 if (!N0->hasOneUse() || !N1->hasOneUse())
7281 return SDValue();
7282
7283 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7284 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7285 if (!N01C || !N11C)
7286 return SDValue();
7287 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7288 return SDValue();
7289
7290 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7291 SDValue N00 = N0->getOperand(0);
7292 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7293 if (!N00->hasOneUse())
7294 return SDValue();
7295 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7296 if (!N001C || N001C->getZExtValue() != 0xFF)
7297 return SDValue();
7298 N00 = N00.getOperand(0);
7299 LookPassAnd0 = true;
7300 }
7301
7302 SDValue N10 = N1->getOperand(0);
7303 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7304 if (!N10->hasOneUse())
7305 return SDValue();
7306 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7307 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7308 // for X86.
7309 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7310 N101C->getZExtValue() != 0xFFFF))
7311 return SDValue();
7312 N10 = N10.getOperand(0);
7313 LookPassAnd1 = true;
7314 }
7315
7316 if (N00 != N10)
7317 return SDValue();
7318
7319 // Make sure everything beyond the low halfword gets set to zero since the SRL
7320 // 16 will clear the top bits.
7321 unsigned OpSizeInBits = VT.getSizeInBits();
7322 if (OpSizeInBits > 16) {
7323 // If the left-shift isn't masked out then the only way this is a bswap is
7324 // if all bits beyond the low 8 are 0. In that case the entire pattern
7325 // reduces to a left shift anyway: leave it for other parts of the combiner.
7326 if (DemandHighBits && !LookPassAnd0)
7327 return SDValue();
7328
7329 // However, if the right shift isn't masked out then it might be because
7330 // it's not needed. See if we can spot that too. If the high bits aren't
7331 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7332 // upper bits to be zero.
7333 if (!LookPassAnd1) {
7334 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7335 if (!DAG.MaskedValueIsZero(N10,
7336 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7337 return SDValue();
7338 }
7339 }
7340
7341 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7342 if (OpSizeInBits > 16) {
7343 SDLoc DL(N);
7344 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7345 DAG.getConstant(OpSizeInBits - 16, DL,
7346 getShiftAmountTy(VT)));
7347 }
7348 return Res;
7349}
7350
7351/// Return true if the specified node is an element that makes up a 32-bit
7352/// packed halfword byteswap.
7353/// ((x & 0x000000ff) << 8) |
7354/// ((x & 0x0000ff00) >> 8) |
7355/// ((x & 0x00ff0000) << 8) |
7356/// ((x & 0xff000000) >> 8)
7358 if (!N->hasOneUse())
7359 return false;
7360
7361 unsigned Opc = N.getOpcode();
7362 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7363 return false;
7364
7365 SDValue N0 = N.getOperand(0);
7366 unsigned Opc0 = N0.getOpcode();
7367 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7368 return false;
7369
7370 ConstantSDNode *N1C = nullptr;
7371 // SHL or SRL: look upstream for AND mask operand
7372 if (Opc == ISD::AND)
7373 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7374 else if (Opc0 == ISD::AND)
7375 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7376 if (!N1C)
7377 return false;
7378
7379 unsigned MaskByteOffset;
7380 switch (N1C->getZExtValue()) {
7381 default:
7382 return false;
7383 case 0xFF: MaskByteOffset = 0; break;
7384 case 0xFF00: MaskByteOffset = 1; break;
7385 case 0xFFFF:
7386 // In case demanded bits didn't clear the bits that will be shifted out.
7387 // This is needed for X86.
7388 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7389 MaskByteOffset = 1;
7390 break;
7391 }
7392 return false;
7393 case 0xFF0000: MaskByteOffset = 2; break;
7394 case 0xFF000000: MaskByteOffset = 3; break;
7395 }
7396
7397 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7398 if (Opc == ISD::AND) {
7399 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7400 // (x >> 8) & 0xff
7401 // (x >> 8) & 0xff0000
7402 if (Opc0 != ISD::SRL)
7403 return false;
7404 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7405 if (!C || C->getZExtValue() != 8)
7406 return false;
7407 } else {
7408 // (x << 8) & 0xff00
7409 // (x << 8) & 0xff000000
7410 if (Opc0 != ISD::SHL)
7411 return false;
7412 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7413 if (!C || C->getZExtValue() != 8)
7414 return false;
7415 }
7416 } else if (Opc == ISD::SHL) {
7417 // (x & 0xff) << 8
7418 // (x & 0xff0000) << 8
7419 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7420 return false;
7421 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7422 if (!C || C->getZExtValue() != 8)
7423 return false;
7424 } else { // Opc == ISD::SRL
7425 // (x & 0xff00) >> 8
7426 // (x & 0xff000000) >> 8
7427 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7428 return false;
7429 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7430 if (!C || C->getZExtValue() != 8)
7431 return false;
7432 }
7433
7434 if (Parts[MaskByteOffset])
7435 return false;
7436
7437 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7438 return true;
7439}
7440
7441// Match 2 elements of a packed halfword bswap.
7443 if (N.getOpcode() == ISD::OR)
7444 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7445 isBSwapHWordElement(N.getOperand(1), Parts);
7446
7447 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7448 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7449 if (!C || C->getAPIntValue() != 16)
7450 return false;
7451 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7452 return true;
7453 }
7454
7455 return false;
7456}
7457
7458// Match this pattern:
7459// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7460// And rewrite this to:
7461// (rotr (bswap A), 16)
7463 SelectionDAG &DAG, SDNode *N, SDValue N0,
7464 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7465 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7466 "MatchBSwapHWordOrAndAnd: expecting i32");
7467 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7468 return SDValue();
7469 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7470 return SDValue();
7471 // TODO: this is too restrictive; lifting this restriction requires more tests
7472 if (!N0->hasOneUse() || !N1->hasOneUse())
7473 return SDValue();
7476 if (!Mask0 || !Mask1)
7477 return SDValue();
7478 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7479 Mask1->getAPIntValue() != 0x00ff00ff)
7480 return SDValue();
7481 SDValue Shift0 = N0.getOperand(0);
7482 SDValue Shift1 = N1.getOperand(0);
7483 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7484 return SDValue();
7485 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7486 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7487 if (!ShiftAmt0 || !ShiftAmt1)
7488 return SDValue();
7489 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7490 return SDValue();
7491 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7492 return SDValue();
7493
7494 SDLoc DL(N);
7495 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7496 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7497 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7498}
7499
7500/// Match a 32-bit packed halfword bswap. That is
7501/// ((x & 0x000000ff) << 8) |
7502/// ((x & 0x0000ff00) >> 8) |
7503/// ((x & 0x00ff0000) << 8) |
7504/// ((x & 0xff000000) >> 8)
7505/// => (rotl (bswap x), 16)
7506SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7507 if (!LegalOperations)
7508 return SDValue();
7509
7510 EVT VT = N->getValueType(0);
7511 if (VT != MVT::i32)
7512 return SDValue();
7514 return SDValue();
7515
7516 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7517 getShiftAmountTy(VT)))
7518 return BSwap;
7519
7520 // Try again with commuted operands.
7521 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7522 getShiftAmountTy(VT)))
7523 return BSwap;
7524
7525
7526 // Look for either
7527 // (or (bswaphpair), (bswaphpair))
7528 // (or (or (bswaphpair), (and)), (and))
7529 // (or (or (and), (bswaphpair)), (and))
7530 SDNode *Parts[4] = {};
7531
7532 if (isBSwapHWordPair(N0, Parts)) {
7533 // (or (or (and), (and)), (or (and), (and)))
7534 if (!isBSwapHWordPair(N1, Parts))
7535 return SDValue();
7536 } else if (N0.getOpcode() == ISD::OR) {
7537 // (or (or (or (and), (and)), (and)), (and))
7538 if (!isBSwapHWordElement(N1, Parts))
7539 return SDValue();
7540 SDValue N00 = N0.getOperand(0);
7541 SDValue N01 = N0.getOperand(1);
7542 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7543 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7544 return SDValue();
7545 } else {
7546 return SDValue();
7547 }
7548
7549 // Make sure the parts are all coming from the same node.
7550 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7551 return SDValue();
7552
7553 SDLoc DL(N);
7554 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7555 SDValue(Parts[0], 0));
7556
7557 // Result of the bswap should be rotated by 16. If it's not legal, then
7558 // do (x << 16) | (x >> 16).
7559 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7561 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7563 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7564 return DAG.getNode(ISD::OR, DL, VT,
7565 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7566 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7567}
7568
7569/// This contains all DAGCombine rules which reduce two values combined by
7570/// an Or operation to a single value \see visitANDLike().
7571SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7572 EVT VT = N1.getValueType();
7573
7574 // fold (or x, undef) -> -1
7575 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7576 return DAG.getAllOnesConstant(DL, VT);
7577
7578 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7579 return V;
7580
7581 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7582 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7583 // Don't increase # computations.
7584 (N0->hasOneUse() || N1->hasOneUse())) {
7585 // We can only do this xform if we know that bits from X that are set in C2
7586 // but not in C1 are already zero. Likewise for Y.
7587 if (const ConstantSDNode *N0O1C =
7589 if (const ConstantSDNode *N1O1C =
7591 // We can only do this xform if we know that bits from X that are set in
7592 // C2 but not in C1 are already zero. Likewise for Y.
7593 const APInt &LHSMask = N0O1C->getAPIntValue();
7594 const APInt &RHSMask = N1O1C->getAPIntValue();
7595
7596 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7597 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7598 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7599 N0.getOperand(0), N1.getOperand(0));
7600 return DAG.getNode(ISD::AND, DL, VT, X,
7601 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7602 }
7603 }
7604 }
7605 }
7606
7607 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7608 if (N0.getOpcode() == ISD::AND &&
7609 N1.getOpcode() == ISD::AND &&
7610 N0.getOperand(0) == N1.getOperand(0) &&
7611 // Don't increase # computations.
7612 (N0->hasOneUse() || N1->hasOneUse())) {
7613 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7614 N0.getOperand(1), N1.getOperand(1));
7615 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7616 }
7617
7618 return SDValue();
7619}
7620
7621/// OR combines for which the commuted variant will be tried as well.
7623 SDNode *N) {
7624 EVT VT = N0.getValueType();
7625 unsigned BW = VT.getScalarSizeInBits();
7626 SDLoc DL(N);
7627
7628 auto peekThroughResize = [](SDValue V) {
7629 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7630 return V->getOperand(0);
7631 return V;
7632 };
7633
7634 SDValue N0Resized = peekThroughResize(N0);
7635 if (N0Resized.getOpcode() == ISD::AND) {
7636 SDValue N1Resized = peekThroughResize(N1);
7637 SDValue N00 = N0Resized.getOperand(0);
7638 SDValue N01 = N0Resized.getOperand(1);
7639
7640 // fold or (and x, y), x --> x
7641 if (N00 == N1Resized || N01 == N1Resized)
7642 return N1;
7643
7644 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7645 // TODO: Set AllowUndefs = true.
7646 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7647 /* AllowUndefs */ false)) {
7648 if (peekThroughResize(NotOperand) == N1Resized)
7649 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7650 N1);
7651 }
7652
7653 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7654 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7655 /* AllowUndefs */ false)) {
7656 if (peekThroughResize(NotOperand) == N1Resized)
7657 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7658 N1);
7659 }
7660 }
7661
7662 SDValue X, Y;
7663
7664 // fold or (xor X, N1), N1 --> or X, N1
7665 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7666 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7667
7668 // fold or (xor x, y), (x and/or y) --> or x, y
7669 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7670 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7672 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7673
7674 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7675 return R;
7676
7677 auto peekThroughZext = [](SDValue V) {
7678 if (V->getOpcode() == ISD::ZERO_EXTEND)
7679 return V->getOperand(0);
7680 return V;
7681 };
7682
7683 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7684 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7685 N0.getOperand(0) == N1.getOperand(0) &&
7686 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7687 return N0;
7688
7689 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7690 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7691 N0.getOperand(1) == N1.getOperand(0) &&
7692 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7693 return N0;
7694
7695 // Attempt to match a legalized build_pair-esque pattern:
7696 // or(shl(aext(Hi),BW/2),zext(Lo))
7697 SDValue Lo, Hi;
7698 if (sd_match(N0,
7700 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7701 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7702 Lo.getValueType() == Hi.getValueType()) {
7703 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7704 SDValue NotLo, NotHi;
7705 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7706 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7707 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7708 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7709 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7710 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7711 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7712 }
7713 }
7714
7715 return SDValue();
7716}
7717
7718SDValue DAGCombiner::visitOR(SDNode *N) {
7719 SDValue N0 = N->getOperand(0);
7720 SDValue N1 = N->getOperand(1);
7721 EVT VT = N1.getValueType();
7722 SDLoc DL(N);
7723
7724 // x | x --> x
7725 if (N0 == N1)
7726 return N0;
7727
7728 // fold (or c1, c2) -> c1|c2
7729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7730 return C;
7731
7732 // canonicalize constant to RHS
7735 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7736
7737 // fold vector ops
7738 if (VT.isVector()) {
7739 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7740 return FoldedVOp;
7741
7742 // fold (or x, 0) -> x, vector edition
7744 return N0;
7745
7746 // fold (or x, -1) -> -1, vector edition
7748 // do not return N1, because undef node may exist in N1
7749 return DAG.getAllOnesConstant(DL, N1.getValueType());
7750
7751 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7752 // Do this only if the resulting type / shuffle is legal.
7753 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7754 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7755 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7756 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7757 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7758 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7759 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7760 // Ensure both shuffles have a zero input.
7761 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7762 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7763 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7764 bool CanFold = true;
7765 int NumElts = VT.getVectorNumElements();
7766 SmallVector<int, 4> Mask(NumElts, -1);
7767
7768 for (int i = 0; i != NumElts; ++i) {
7769 int M0 = SV0->getMaskElt(i);
7770 int M1 = SV1->getMaskElt(i);
7771
7772 // Determine if either index is pointing to a zero vector.
7773 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7774 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7775
7776 // If one element is zero and the otherside is undef, keep undef.
7777 // This also handles the case that both are undef.
7778 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7779 continue;
7780
7781 // Make sure only one of the elements is zero.
7782 if (M0Zero == M1Zero) {
7783 CanFold = false;
7784 break;
7785 }
7786
7787 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7788
7789 // We have a zero and non-zero element. If the non-zero came from
7790 // SV0 make the index a LHS index. If it came from SV1, make it
7791 // a RHS index. We need to mod by NumElts because we don't care
7792 // which operand it came from in the original shuffles.
7793 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7794 }
7795
7796 if (CanFold) {
7797 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7798 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7799 SDValue LegalShuffle =
7800 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7801 if (LegalShuffle)
7802 return LegalShuffle;
7803 }
7804 }
7805 }
7806 }
7807
7808 // fold (or x, 0) -> x
7809 if (isNullConstant(N1))
7810 return N0;
7811
7812 // fold (or x, -1) -> -1
7813 if (isAllOnesConstant(N1))
7814 return N1;
7815
7816 if (SDValue NewSel = foldBinOpIntoSelect(N))
7817 return NewSel;
7818
7819 // fold (or x, c) -> c iff (x & ~c) == 0
7820 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7821 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7822 return N1;
7823
7824 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7825 return R;
7826
7827 if (SDValue Combined = visitORLike(N0, N1, DL))
7828 return Combined;
7829
7830 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7831 return Combined;
7832
7833 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7834 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7835 return BSwap;
7836 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7837 return BSwap;
7838
7839 // reassociate or
7840 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7841 return ROR;
7842
7843 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7844 if (SDValue SD =
7845 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7846 return SD;
7847
7848 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7849 // iff (c1 & c2) != 0 or c1/c2 are undef.
7850 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7851 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7852 };
7853 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7854 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7855 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7856 {N1, N0.getOperand(1)})) {
7857 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7858 AddToWorklist(IOR.getNode());
7859 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7860 }
7861 }
7862
7863 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7864 return Combined;
7865 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7866 return Combined;
7867
7868 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7869 if (N0.getOpcode() == N1.getOpcode())
7870 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7871 return V;
7872
7873 // See if this is some rotate idiom.
7874 if (SDValue Rot = MatchRotate(N0, N1, DL))
7875 return Rot;
7876
7877 if (SDValue Load = MatchLoadCombine(N))
7878 return Load;
7879
7880 // Simplify the operands using demanded-bits information.
7882 return SDValue(N, 0);
7883
7884 // If OR can be rewritten into ADD, try combines based on ADD.
7885 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7886 DAG.isADDLike(SDValue(N, 0)))
7887 if (SDValue Combined = visitADDLike(N))
7888 return Combined;
7889
7890 // Postpone until legalization completed to avoid interference with bswap
7891 // folding
7892 if (LegalOperations || VT.isVector())
7893 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7894 return R;
7895
7896 return SDValue();
7897}
7898
7900 SDValue &Mask) {
7901 if (Op.getOpcode() == ISD::AND &&
7902 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7903 Mask = Op.getOperand(1);
7904 return Op.getOperand(0);
7905 }
7906 return Op;
7907}
7908
7909/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7910static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7911 SDValue &Mask) {
7912 Op = stripConstantMask(DAG, Op, Mask);
7913 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7914 Shift = Op;
7915 return true;
7916 }
7917 return false;
7918}
7919
7920/// Helper function for visitOR to extract the needed side of a rotate idiom
7921/// from a shl/srl/mul/udiv. This is meant to handle cases where
7922/// InstCombine merged some outside op with one of the shifts from
7923/// the rotate pattern.
7924/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7925/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7926/// patterns:
7927///
7928/// (or (add v v) (shrl v bitwidth-1)):
7929/// expands (add v v) -> (shl v 1)
7930///
7931/// (or (mul v c0) (shrl (mul v c1) c2)):
7932/// expands (mul v c0) -> (shl (mul v c1) c3)
7933///
7934/// (or (udiv v c0) (shl (udiv v c1) c2)):
7935/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7936///
7937/// (or (shl v c0) (shrl (shl v c1) c2)):
7938/// expands (shl v c0) -> (shl (shl v c1) c3)
7939///
7940/// (or (shrl v c0) (shl (shrl v c1) c2)):
7941/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7942///
7943/// Such that in all cases, c3+c2==bitwidth(op v c1).
7945 SDValue ExtractFrom, SDValue &Mask,
7946 const SDLoc &DL) {
7947 assert(OppShift && ExtractFrom && "Empty SDValue");
7948 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7949 return SDValue();
7950
7951 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7952
7953 // Value and Type of the shift.
7954 SDValue OppShiftLHS = OppShift.getOperand(0);
7955 EVT ShiftedVT = OppShiftLHS.getValueType();
7956
7957 // Amount of the existing shift.
7958 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7959
7960 // (add v v) -> (shl v 1)
7961 // TODO: Should this be a general DAG canonicalization?
7962 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7963 ExtractFrom.getOpcode() == ISD::ADD &&
7964 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7965 ExtractFrom.getOperand(0) == OppShiftLHS &&
7966 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7967 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7968 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7969
7970 // Preconditions:
7971 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7972 //
7973 // Find opcode of the needed shift to be extracted from (op0 v c0).
7974 unsigned Opcode = ISD::DELETED_NODE;
7975 bool IsMulOrDiv = false;
7976 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7977 // opcode or its arithmetic (mul or udiv) variant.
7978 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7979 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7980 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7981 return false;
7982 Opcode = NeededShift;
7983 return true;
7984 };
7985 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7986 // that the needed shift can be extracted from.
7987 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7988 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7989 return SDValue();
7990
7991 // op0 must be the same opcode on both sides, have the same LHS argument,
7992 // and produce the same value type.
7993 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7994 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7995 ShiftedVT != ExtractFrom.getValueType())
7996 return SDValue();
7997
7998 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7999 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8000 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8001 ConstantSDNode *ExtractFromCst =
8002 isConstOrConstSplat(ExtractFrom.getOperand(1));
8003 // TODO: We should be able to handle non-uniform constant vectors for these values
8004 // Check that we have constant values.
8005 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8006 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8007 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8008 return SDValue();
8009
8010 // Compute the shift amount we need to extract to complete the rotate.
8011 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8012 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8013 return SDValue();
8014 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8015 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8016 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8017 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8018 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8019
8020 // Now try extract the needed shift from the ExtractFrom op and see if the
8021 // result matches up with the existing shift's LHS op.
8022 if (IsMulOrDiv) {
8023 // Op to extract from is a mul or udiv by a constant.
8024 // Check:
8025 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8026 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8027 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8028 NeededShiftAmt.getZExtValue());
8029 APInt ResultAmt;
8030 APInt Rem;
8031 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8032 if (Rem != 0 || ResultAmt != OppLHSAmt)
8033 return SDValue();
8034 } else {
8035 // Op to extract from is a shift by a constant.
8036 // Check:
8037 // c2 - (bitwidth(op0 v c0) - c1) == c0
8038 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8039 ExtractFromAmt.getBitWidth()))
8040 return SDValue();
8041 }
8042
8043 // Return the expanded shift op that should allow a rotate to be formed.
8044 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8045 EVT ResVT = ExtractFrom.getValueType();
8046 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8047 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8048}
8049
8050// Return true if we can prove that, whenever Neg and Pos are both in the
8051// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8052// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8053//
8054// (or (shift1 X, Neg), (shift2 X, Pos))
8055//
8056// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8057// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8058// to consider shift amounts with defined behavior.
8059//
8060// The IsRotate flag should be set when the LHS of both shifts is the same.
8061// Otherwise if matching a general funnel shift, it should be clear.
8062static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8063 SelectionDAG &DAG, bool IsRotate) {
8064 const auto &TLI = DAG.getTargetLoweringInfo();
8065 // If EltSize is a power of 2 then:
8066 //
8067 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8068 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8069 //
8070 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8071 // for the stronger condition:
8072 //
8073 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8074 //
8075 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8076 // we can just replace Neg with Neg' for the rest of the function.
8077 //
8078 // In other cases we check for the even stronger condition:
8079 //
8080 // Neg == EltSize - Pos [B]
8081 //
8082 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8083 // behavior if Pos == 0 (and consequently Neg == EltSize).
8084 //
8085 // We could actually use [A] whenever EltSize is a power of 2, but the
8086 // only extra cases that it would match are those uninteresting ones
8087 // where Neg and Pos are never in range at the same time. E.g. for
8088 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8089 // as well as (sub 32, Pos), but:
8090 //
8091 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8092 //
8093 // always invokes undefined behavior for 32-bit X.
8094 //
8095 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8096 // This allows us to peek through any operations that only affect Mask's
8097 // un-demanded bits.
8098 //
8099 // NOTE: We can only do this when matching operations which won't modify the
8100 // least Log2(EltSize) significant bits and not a general funnel shift.
8101 unsigned MaskLoBits = 0;
8102 if (IsRotate && isPowerOf2_64(EltSize)) {
8103 unsigned Bits = Log2_64(EltSize);
8104 unsigned NegBits = Neg.getScalarValueSizeInBits();
8105 if (NegBits >= Bits) {
8106 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8107 if (SDValue Inner =
8109 Neg = Inner;
8110 MaskLoBits = Bits;
8111 }
8112 }
8113 }
8114
8115 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8116 if (Neg.getOpcode() != ISD::SUB)
8117 return false;
8119 if (!NegC)
8120 return false;
8121 SDValue NegOp1 = Neg.getOperand(1);
8122
8123 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8124 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8125 // are redundant for the purpose of the equality.
8126 if (MaskLoBits) {
8127 unsigned PosBits = Pos.getScalarValueSizeInBits();
8128 if (PosBits >= MaskLoBits) {
8129 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8130 if (SDValue Inner =
8132 Pos = Inner;
8133 }
8134 }
8135 }
8136
8137 // The condition we need is now:
8138 //
8139 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8140 //
8141 // If NegOp1 == Pos then we need:
8142 //
8143 // EltSize & Mask == NegC & Mask
8144 //
8145 // (because "x & Mask" is a truncation and distributes through subtraction).
8146 //
8147 // We also need to account for a potential truncation of NegOp1 if the amount
8148 // has already been legalized to a shift amount type.
8149 APInt Width;
8150 if ((Pos == NegOp1) ||
8151 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8152 Width = NegC->getAPIntValue();
8153
8154 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8155 // Then the condition we want to prove becomes:
8156 //
8157 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8158 //
8159 // which, again because "x & Mask" is a truncation, becomes:
8160 //
8161 // NegC & Mask == (EltSize - PosC) & Mask
8162 // EltSize & Mask == (NegC + PosC) & Mask
8163 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8164 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8165 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8166 else
8167 return false;
8168 } else
8169 return false;
8170
8171 // Now we just need to check that EltSize & Mask == Width & Mask.
8172 if (MaskLoBits)
8173 // EltSize & Mask is 0 since Mask is EltSize - 1.
8174 return Width.getLoBits(MaskLoBits) == 0;
8175 return Width == EltSize;
8176}
8177
8178// A subroutine of MatchRotate used once we have found an OR of two opposite
8179// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8180// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8181// former being preferred if supported. InnerPos and InnerNeg are Pos and
8182// Neg with outer conversions stripped away.
8183SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8184 SDValue Neg, SDValue InnerPos,
8185 SDValue InnerNeg, bool HasPos,
8186 unsigned PosOpcode, unsigned NegOpcode,
8187 const SDLoc &DL) {
8188 // fold (or (shl x, (*ext y)),
8189 // (srl x, (*ext (sub 32, y)))) ->
8190 // (rotl x, y) or (rotr x, (sub 32, y))
8191 //
8192 // fold (or (shl x, (*ext (sub 32, y))),
8193 // (srl x, (*ext y))) ->
8194 // (rotr x, y) or (rotl x, (sub 32, y))
8195 EVT VT = Shifted.getValueType();
8196 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8197 /*IsRotate*/ true)) {
8198 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8199 HasPos ? Pos : Neg);
8200 }
8201
8202 return SDValue();
8203}
8204
8205// A subroutine of MatchRotate used once we have found an OR of two opposite
8206// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8207// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8208// former being preferred if supported. InnerPos and InnerNeg are Pos and
8209// Neg with outer conversions stripped away.
8210// TODO: Merge with MatchRotatePosNeg.
8211SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8212 SDValue Neg, SDValue InnerPos,
8213 SDValue InnerNeg, bool HasPos,
8214 unsigned PosOpcode, unsigned NegOpcode,
8215 const SDLoc &DL) {
8216 EVT VT = N0.getValueType();
8217 unsigned EltBits = VT.getScalarSizeInBits();
8218
8219 // fold (or (shl x0, (*ext y)),
8220 // (srl x1, (*ext (sub 32, y)))) ->
8221 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8222 //
8223 // fold (or (shl x0, (*ext (sub 32, y))),
8224 // (srl x1, (*ext y))) ->
8225 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8226 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8227 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8228 HasPos ? Pos : Neg);
8229 }
8230
8231 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8232 // so for now just use the PosOpcode case if its legal.
8233 // TODO: When can we use the NegOpcode case?
8234 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8235 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8236 if (Op.getOpcode() != BinOpc)
8237 return false;
8238 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8239 return Cst && (Cst->getAPIntValue() == Imm);
8240 };
8241
8242 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8243 // -> (fshl x0, x1, y)
8244 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8245 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8246 InnerPos == InnerNeg.getOperand(0) &&
8248 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8249 }
8250
8251 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8252 // -> (fshr x0, x1, y)
8253 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8254 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8255 InnerNeg == InnerPos.getOperand(0) &&
8257 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8258 }
8259
8260 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8261 // -> (fshr x0, x1, y)
8262 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8263 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8264 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8265 InnerNeg == InnerPos.getOperand(0) &&
8267 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8268 }
8269 }
8270
8271 return SDValue();
8272}
8273
8274// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8275// idioms for rotate, and if the target supports rotation instructions, generate
8276// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8277// with different shifted sources.
8278SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8279 EVT VT = LHS.getValueType();
8280
8281 // The target must have at least one rotate/funnel flavor.
8282 // We still try to match rotate by constant pre-legalization.
8283 // TODO: Support pre-legalization funnel-shift by constant.
8284 bool HasROTL = hasOperation(ISD::ROTL, VT);
8285 bool HasROTR = hasOperation(ISD::ROTR, VT);
8286 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8287 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8288
8289 // If the type is going to be promoted and the target has enabled custom
8290 // lowering for rotate, allow matching rotate by non-constants. Only allow
8291 // this for scalar types.
8292 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8296 }
8297
8298 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8299 return SDValue();
8300
8301 // Check for truncated rotate.
8302 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8303 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8304 assert(LHS.getValueType() == RHS.getValueType());
8305 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8306 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8307 }
8308 }
8309
8310 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8311 SDValue LHSShift; // The shift.
8312 SDValue LHSMask; // AND value if any.
8313 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8314
8315 SDValue RHSShift; // The shift.
8316 SDValue RHSMask; // AND value if any.
8317 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8318
8319 // If neither side matched a rotate half, bail
8320 if (!LHSShift && !RHSShift)
8321 return SDValue();
8322
8323 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8324 // side of the rotate, so try to handle that here. In all cases we need to
8325 // pass the matched shift from the opposite side to compute the opcode and
8326 // needed shift amount to extract. We still want to do this if both sides
8327 // matched a rotate half because one half may be a potential overshift that
8328 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8329 // single one).
8330
8331 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8332 if (LHSShift)
8333 if (SDValue NewRHSShift =
8334 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8335 RHSShift = NewRHSShift;
8336 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8337 if (RHSShift)
8338 if (SDValue NewLHSShift =
8339 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8340 LHSShift = NewLHSShift;
8341
8342 // If a side is still missing, nothing else we can do.
8343 if (!RHSShift || !LHSShift)
8344 return SDValue();
8345
8346 // At this point we've matched or extracted a shift op on each side.
8347
8348 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8349 return SDValue(); // Shifts must disagree.
8350
8351 // Canonicalize shl to left side in a shl/srl pair.
8352 if (RHSShift.getOpcode() == ISD::SHL) {
8353 std::swap(LHS, RHS);
8354 std::swap(LHSShift, RHSShift);
8355 std::swap(LHSMask, RHSMask);
8356 }
8357
8358 // Something has gone wrong - we've lost the shl/srl pair - bail.
8359 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8360 return SDValue();
8361
8362 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8363 SDValue LHSShiftArg = LHSShift.getOperand(0);
8364 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8365 SDValue RHSShiftArg = RHSShift.getOperand(0);
8366 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8367
8368 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8370 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8371 };
8372
8373 auto ApplyMasks = [&](SDValue Res) {
8374 // If there is an AND of either shifted operand, apply it to the result.
8375 if (LHSMask.getNode() || RHSMask.getNode()) {
8378
8379 if (LHSMask.getNode()) {
8380 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8381 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8382 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8383 }
8384 if (RHSMask.getNode()) {
8385 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8386 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8387 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8388 }
8389
8390 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8391 }
8392
8393 return Res;
8394 };
8395
8396 // TODO: Support pre-legalization funnel-shift by constant.
8397 bool IsRotate = LHSShiftArg == RHSShiftArg;
8398 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8399 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8400 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8401 // Look for a disguised rotate by constant.
8402 // The common shifted operand X may be hidden inside another 'or'.
8403 SDValue X, Y;
8404 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8405 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8406 return false;
8407 if (CommonOp == Or.getOperand(0)) {
8408 X = CommonOp;
8409 Y = Or.getOperand(1);
8410 return true;
8411 }
8412 if (CommonOp == Or.getOperand(1)) {
8413 X = CommonOp;
8414 Y = Or.getOperand(0);
8415 return true;
8416 }
8417 return false;
8418 };
8419
8420 SDValue Res;
8421 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8422 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8423 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8424 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8425 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8426 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8427 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8428 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8429 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8430 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8431 } else {
8432 return SDValue();
8433 }
8434
8435 return ApplyMasks(Res);
8436 }
8437
8438 return SDValue(); // Requires funnel shift support.
8439 }
8440
8441 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8442 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8443 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8444 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8445 // iff C1+C2 == EltSizeInBits
8446 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8447 SDValue Res;
8448 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8449 bool UseROTL = !LegalOperations || HasROTL;
8450 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8451 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8452 } else {
8453 bool UseFSHL = !LegalOperations || HasFSHL;
8454 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8455 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8456 }
8457
8458 return ApplyMasks(Res);
8459 }
8460
8461 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8462 // shift.
8463 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8464 return SDValue();
8465
8466 // If there is a mask here, and we have a variable shift, we can't be sure
8467 // that we're masking out the right stuff.
8468 if (LHSMask.getNode() || RHSMask.getNode())
8469 return SDValue();
8470
8471 // If the shift amount is sign/zext/any-extended just peel it off.
8472 SDValue LExtOp0 = LHSShiftAmt;
8473 SDValue RExtOp0 = RHSShiftAmt;
8474 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8475 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8476 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8477 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8478 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8479 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8480 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8481 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8482 LExtOp0 = LHSShiftAmt.getOperand(0);
8483 RExtOp0 = RHSShiftAmt.getOperand(0);
8484 }
8485
8486 if (IsRotate && (HasROTL || HasROTR)) {
8487 SDValue TryL =
8488 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8489 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8490 if (TryL)
8491 return TryL;
8492
8493 SDValue TryR =
8494 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8495 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8496 if (TryR)
8497 return TryR;
8498 }
8499
8500 SDValue TryL =
8501 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8502 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8503 if (TryL)
8504 return TryL;
8505
8506 SDValue TryR =
8507 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8508 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8509 if (TryR)
8510 return TryR;
8511
8512 return SDValue();
8513}
8514
8515/// Recursively traverses the expression calculating the origin of the requested
8516/// byte of the given value. Returns std::nullopt if the provider can't be
8517/// calculated.
8518///
8519/// For all the values except the root of the expression, we verify that the
8520/// value has exactly one use and if not then return std::nullopt. This way if
8521/// the origin of the byte is returned it's guaranteed that the values which
8522/// contribute to the byte are not used outside of this expression.
8523
8524/// However, there is a special case when dealing with vector loads -- we allow
8525/// more than one use if the load is a vector type. Since the values that
8526/// contribute to the byte ultimately come from the ExtractVectorElements of the
8527/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8528/// because those operations are independent from the pattern to be combined.
8529/// For vector loads, we simply care that the ByteProviders are adjacent
8530/// positions of the same vector, and their index matches the byte that is being
8531/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8532/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8533/// byte position we are trying to provide for the LoadCombine. If these do
8534/// not match, then we can not combine the vector loads. \p Index uses the
8535/// byte position we are trying to provide for and is matched against the
8536/// shl and load size. The \p Index algorithm ensures the requested byte is
8537/// provided for by the pattern, and the pattern does not over provide bytes.
8538///
8539///
8540/// The supported LoadCombine pattern for vector loads is as follows
8541/// or
8542/// / \
8543/// or shl
8544/// / \ |
8545/// or shl zext
8546/// / \ | |
8547/// shl zext zext EVE*
8548/// | | | |
8549/// zext EVE* EVE* LOAD
8550/// | | |
8551/// EVE* LOAD LOAD
8552/// |
8553/// LOAD
8554///
8555/// *ExtractVectorElement
8557
8558static std::optional<SDByteProvider>
8560 std::optional<uint64_t> VectorIndex,
8561 unsigned StartingIndex = 0) {
8562
8563 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8564 if (Depth == 10)
8565 return std::nullopt;
8566
8567 // Only allow multiple uses if the instruction is a vector load (in which
8568 // case we will use the load for every ExtractVectorElement)
8569 if (Depth && !Op.hasOneUse() &&
8570 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8571 return std::nullopt;
8572
8573 // Fail to combine if we have encountered anything but a LOAD after handling
8574 // an ExtractVectorElement.
8575 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8576 return std::nullopt;
8577
8578 unsigned BitWidth = Op.getValueSizeInBits();
8579 if (BitWidth % 8 != 0)
8580 return std::nullopt;
8581 unsigned ByteWidth = BitWidth / 8;
8582 assert(Index < ByteWidth && "invalid index requested");
8583 (void) ByteWidth;
8584
8585 switch (Op.getOpcode()) {
8586 case ISD::OR: {
8587 auto LHS =
8588 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8589 if (!LHS)
8590 return std::nullopt;
8591 auto RHS =
8592 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8593 if (!RHS)
8594 return std::nullopt;
8595
8596 if (LHS->isConstantZero())
8597 return RHS;
8598 if (RHS->isConstantZero())
8599 return LHS;
8600 return std::nullopt;
8601 }
8602 case ISD::SHL: {
8603 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8604 if (!ShiftOp)
8605 return std::nullopt;
8606
8607 uint64_t BitShift = ShiftOp->getZExtValue();
8608
8609 if (BitShift % 8 != 0)
8610 return std::nullopt;
8611 uint64_t ByteShift = BitShift / 8;
8612
8613 // If we are shifting by an amount greater than the index we are trying to
8614 // provide, then do not provide anything. Otherwise, subtract the index by
8615 // the amount we shifted by.
8616 return Index < ByteShift
8618 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8619 Depth + 1, VectorIndex, Index);
8620 }
8621 case ISD::ANY_EXTEND:
8622 case ISD::SIGN_EXTEND:
8623 case ISD::ZERO_EXTEND: {
8624 SDValue NarrowOp = Op->getOperand(0);
8625 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8626 if (NarrowBitWidth % 8 != 0)
8627 return std::nullopt;
8628 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8629
8630 if (Index >= NarrowByteWidth)
8631 return Op.getOpcode() == ISD::ZERO_EXTEND
8632 ? std::optional<SDByteProvider>(
8634 : std::nullopt;
8635 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8636 StartingIndex);
8637 }
8638 case ISD::BSWAP:
8639 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8640 Depth + 1, VectorIndex, StartingIndex);
8642 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8643 if (!OffsetOp)
8644 return std::nullopt;
8645
8646 VectorIndex = OffsetOp->getZExtValue();
8647
8648 SDValue NarrowOp = Op->getOperand(0);
8649 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8650 if (NarrowBitWidth % 8 != 0)
8651 return std::nullopt;
8652 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8653 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8654 // type, leaving the high bits undefined.
8655 if (Index >= NarrowByteWidth)
8656 return std::nullopt;
8657
8658 // Check to see if the position of the element in the vector corresponds
8659 // with the byte we are trying to provide for. In the case of a vector of
8660 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8661 // the element will provide a range of bytes. For example, if we have a
8662 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8663 // 3).
8664 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8665 return std::nullopt;
8666 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8667 return std::nullopt;
8668
8669 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8670 VectorIndex, StartingIndex);
8671 }
8672 case ISD::LOAD: {
8673 auto L = cast<LoadSDNode>(Op.getNode());
8674 if (!L->isSimple() || L->isIndexed())
8675 return std::nullopt;
8676
8677 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8678 if (NarrowBitWidth % 8 != 0)
8679 return std::nullopt;
8680 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8681
8682 // If the width of the load does not reach byte we are trying to provide for
8683 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8684 // question
8685 if (Index >= NarrowByteWidth)
8686 return L->getExtensionType() == ISD::ZEXTLOAD
8687 ? std::optional<SDByteProvider>(
8689 : std::nullopt;
8690
8691 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8692 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8693 }
8694 }
8695
8696 return std::nullopt;
8697}
8698
8699static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8700 return i;
8701}
8702
8703static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8704 return BW - i - 1;
8705}
8706
8707// Check if the bytes offsets we are looking at match with either big or
8708// little endian value loaded. Return true for big endian, false for little
8709// endian, and std::nullopt if match failed.
8710static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8711 int64_t FirstOffset) {
8712 // The endian can be decided only when it is 2 bytes at least.
8713 unsigned Width = ByteOffsets.size();
8714 if (Width < 2)
8715 return std::nullopt;
8716
8717 bool BigEndian = true, LittleEndian = true;
8718 for (unsigned i = 0; i < Width; i++) {
8719 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8720 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8721 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8722 if (!BigEndian && !LittleEndian)
8723 return std::nullopt;
8724 }
8725
8726 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8727 "little endian");
8728 return BigEndian;
8729}
8730
8732 switch (Value.getOpcode()) {
8733 case ISD::TRUNCATE:
8734 case ISD::ZERO_EXTEND:
8735 case ISD::SIGN_EXTEND:
8736 case ISD::ANY_EXTEND:
8737 return stripTruncAndExt(Value.getOperand(0));
8738 }
8739 return Value;
8740}
8741
8742/// Match a pattern where a wide type scalar value is stored by several narrow
8743/// stores. Fold it into a single store or a BSWAP and a store if the targets
8744/// supports it.
8745///
8746/// Assuming little endian target:
8747/// i8 *p = ...
8748/// i32 val = ...
8749/// p[0] = (val >> 0) & 0xFF;
8750/// p[1] = (val >> 8) & 0xFF;
8751/// p[2] = (val >> 16) & 0xFF;
8752/// p[3] = (val >> 24) & 0xFF;
8753/// =>
8754/// *((i32)p) = val;
8755///
8756/// i8 *p = ...
8757/// i32 val = ...
8758/// p[0] = (val >> 24) & 0xFF;
8759/// p[1] = (val >> 16) & 0xFF;
8760/// p[2] = (val >> 8) & 0xFF;
8761/// p[3] = (val >> 0) & 0xFF;
8762/// =>
8763/// *((i32)p) = BSWAP(val);
8764SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8765 // The matching looks for "store (trunc x)" patterns that appear early but are
8766 // likely to be replaced by truncating store nodes during combining.
8767 // TODO: If there is evidence that running this later would help, this
8768 // limitation could be removed. Legality checks may need to be added
8769 // for the created store and optional bswap/rotate.
8770 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8771 return SDValue();
8772
8773 // We only handle merging simple stores of 1-4 bytes.
8774 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8775 EVT MemVT = N->getMemoryVT();
8776 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8777 !N->isSimple() || N->isIndexed())
8778 return SDValue();
8779
8780 // Collect all of the stores in the chain, upto the maximum store width (i64).
8781 SDValue Chain = N->getChain();
8783 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8784 unsigned MaxWideNumBits = 64;
8785 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8786 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8787 // All stores must be the same size to ensure that we are writing all of the
8788 // bytes in the wide value.
8789 // This store should have exactly one use as a chain operand for another
8790 // store in the merging set. If there are other chain uses, then the
8791 // transform may not be safe because order of loads/stores outside of this
8792 // set may not be preserved.
8793 // TODO: We could allow multiple sizes by tracking each stored byte.
8794 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8795 Store->isIndexed() || !Store->hasOneUse())
8796 return SDValue();
8797 Stores.push_back(Store);
8798 Chain = Store->getChain();
8799 if (MaxStores < Stores.size())
8800 return SDValue();
8801 }
8802 // There is no reason to continue if we do not have at least a pair of stores.
8803 if (Stores.size() < 2)
8804 return SDValue();
8805
8806 // Handle simple types only.
8807 LLVMContext &Context = *DAG.getContext();
8808 unsigned NumStores = Stores.size();
8809 unsigned WideNumBits = NumStores * NarrowNumBits;
8810 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8811 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8812 return SDValue();
8813
8814 // Check if all bytes of the source value that we are looking at are stored
8815 // to the same base address. Collect offsets from Base address into OffsetMap.
8816 SDValue SourceValue;
8817 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8818 int64_t FirstOffset = INT64_MAX;
8819 StoreSDNode *FirstStore = nullptr;
8820 std::optional<BaseIndexOffset> Base;
8821 for (auto *Store : Stores) {
8822 // All the stores store different parts of the CombinedValue. A truncate is
8823 // required to get the partial value.
8824 SDValue Trunc = Store->getValue();
8825 if (Trunc.getOpcode() != ISD::TRUNCATE)
8826 return SDValue();
8827 // Other than the first/last part, a shift operation is required to get the
8828 // offset.
8829 int64_t Offset = 0;
8830 SDValue WideVal = Trunc.getOperand(0);
8831 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8832 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8833 // The shift amount must be a constant multiple of the narrow type.
8834 // It is translated to the offset address in the wide source value "y".
8835 //
8836 // x = srl y, ShiftAmtC
8837 // i8 z = trunc x
8838 // store z, ...
8839 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8840 if (ShiftAmtC % NarrowNumBits != 0)
8841 return SDValue();
8842
8843 Offset = ShiftAmtC / NarrowNumBits;
8844 WideVal = WideVal.getOperand(0);
8845 }
8846
8847 // Stores must share the same source value with different offsets.
8848 // Truncate and extends should be stripped to get the single source value.
8849 if (!SourceValue)
8850 SourceValue = WideVal;
8851 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8852 return SDValue();
8853 else if (SourceValue.getValueType() != WideVT) {
8854 if (WideVal.getValueType() == WideVT ||
8855 WideVal.getScalarValueSizeInBits() >
8856 SourceValue.getScalarValueSizeInBits())
8857 SourceValue = WideVal;
8858 // Give up if the source value type is smaller than the store size.
8859 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8860 return SDValue();
8861 }
8862
8863 // Stores must share the same base address.
8865 int64_t ByteOffsetFromBase = 0;
8866 if (!Base)
8867 Base = Ptr;
8868 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8869 return SDValue();
8870
8871 // Remember the first store.
8872 if (ByteOffsetFromBase < FirstOffset) {
8873 FirstStore = Store;
8874 FirstOffset = ByteOffsetFromBase;
8875 }
8876 // Map the offset in the store and the offset in the combined value, and
8877 // early return if it has been set before.
8878 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8879 return SDValue();
8880 OffsetMap[Offset] = ByteOffsetFromBase;
8881 }
8882
8883 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8884 assert(FirstStore && "First store must be set");
8885
8886 // Check that a store of the wide type is both allowed and fast on the target
8887 const DataLayout &Layout = DAG.getDataLayout();
8888 unsigned Fast = 0;
8889 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8890 *FirstStore->getMemOperand(), &Fast);
8891 if (!Allowed || !Fast)
8892 return SDValue();
8893
8894 // Check if the pieces of the value are going to the expected places in memory
8895 // to merge the stores.
8896 auto checkOffsets = [&](bool MatchLittleEndian) {
8897 if (MatchLittleEndian) {
8898 for (unsigned i = 0; i != NumStores; ++i)
8899 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8900 return false;
8901 } else { // MatchBigEndian by reversing loop counter.
8902 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8903 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8904 return false;
8905 }
8906 return true;
8907 };
8908
8909 // Check if the offsets line up for the native data layout of this target.
8910 bool NeedBswap = false;
8911 bool NeedRotate = false;
8912 if (!checkOffsets(Layout.isLittleEndian())) {
8913 // Special-case: check if byte offsets line up for the opposite endian.
8914 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8915 NeedBswap = true;
8916 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8917 NeedRotate = true;
8918 else
8919 return SDValue();
8920 }
8921
8922 SDLoc DL(N);
8923 if (WideVT != SourceValue.getValueType()) {
8924 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8925 "Unexpected store value to merge");
8926 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8927 }
8928
8929 // Before legalize we can introduce illegal bswaps/rotates which will be later
8930 // converted to an explicit bswap sequence. This way we end up with a single
8931 // store and byte shuffling instead of several stores and byte shuffling.
8932 if (NeedBswap) {
8933 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8934 } else if (NeedRotate) {
8935 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8936 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8937 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8938 }
8939
8940 SDValue NewStore =
8941 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8942 FirstStore->getPointerInfo(), FirstStore->getAlign());
8943
8944 // Rely on other DAG combine rules to remove the other individual stores.
8945 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8946 return NewStore;
8947}
8948
8949/// Match a pattern where a wide type scalar value is loaded by several narrow
8950/// loads and combined by shifts and ors. Fold it into a single load or a load
8951/// and a BSWAP if the targets supports it.
8952///
8953/// Assuming little endian target:
8954/// i8 *a = ...
8955/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8956/// =>
8957/// i32 val = *((i32)a)
8958///
8959/// i8 *a = ...
8960/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8961/// =>
8962/// i32 val = BSWAP(*((i32)a))
8963///
8964/// TODO: This rule matches complex patterns with OR node roots and doesn't
8965/// interact well with the worklist mechanism. When a part of the pattern is
8966/// updated (e.g. one of the loads) its direct users are put into the worklist,
8967/// but the root node of the pattern which triggers the load combine is not
8968/// necessarily a direct user of the changed node. For example, once the address
8969/// of t28 load is reassociated load combine won't be triggered:
8970/// t25: i32 = add t4, Constant:i32<2>
8971/// t26: i64 = sign_extend t25
8972/// t27: i64 = add t2, t26
8973/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8974/// t29: i32 = zero_extend t28
8975/// t32: i32 = shl t29, Constant:i8<8>
8976/// t33: i32 = or t23, t32
8977/// As a possible fix visitLoad can check if the load can be a part of a load
8978/// combine pattern and add corresponding OR roots to the worklist.
8979SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8980 assert(N->getOpcode() == ISD::OR &&
8981 "Can only match load combining against OR nodes");
8982
8983 // Handles simple types only
8984 EVT VT = N->getValueType(0);
8985 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8986 return SDValue();
8987 unsigned ByteWidth = VT.getSizeInBits() / 8;
8988
8989 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8990 auto MemoryByteOffset = [&](SDByteProvider P) {
8991 assert(P.hasSrc() && "Must be a memory byte provider");
8992 auto *Load = cast<LoadSDNode>(P.Src.value());
8993
8994 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
8995
8996 assert(LoadBitWidth % 8 == 0 &&
8997 "can only analyze providers for individual bytes not bit");
8998 unsigned LoadByteWidth = LoadBitWidth / 8;
8999 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9000 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9001 };
9002
9003 std::optional<BaseIndexOffset> Base;
9004 SDValue Chain;
9005
9007 std::optional<SDByteProvider> FirstByteProvider;
9008 int64_t FirstOffset = INT64_MAX;
9009
9010 // Check if all the bytes of the OR we are looking at are loaded from the same
9011 // base address. Collect bytes offsets from Base address in ByteOffsets.
9012 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9013 unsigned ZeroExtendedBytes = 0;
9014 for (int i = ByteWidth - 1; i >= 0; --i) {
9015 auto P =
9016 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9017 /*StartingIndex*/ i);
9018 if (!P)
9019 return SDValue();
9020
9021 if (P->isConstantZero()) {
9022 // It's OK for the N most significant bytes to be 0, we can just
9023 // zero-extend the load.
9024 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9025 return SDValue();
9026 continue;
9027 }
9028 assert(P->hasSrc() && "provenance should either be memory or zero");
9029 auto *L = cast<LoadSDNode>(P->Src.value());
9030
9031 // All loads must share the same chain
9032 SDValue LChain = L->getChain();
9033 if (!Chain)
9034 Chain = LChain;
9035 else if (Chain != LChain)
9036 return SDValue();
9037
9038 // Loads must share the same base address
9040 int64_t ByteOffsetFromBase = 0;
9041
9042 // For vector loads, the expected load combine pattern will have an
9043 // ExtractElement for each index in the vector. While each of these
9044 // ExtractElements will be accessing the same base address as determined
9045 // by the load instruction, the actual bytes they interact with will differ
9046 // due to different ExtractElement indices. To accurately determine the
9047 // byte position of an ExtractElement, we offset the base load ptr with
9048 // the index multiplied by the byte size of each element in the vector.
9049 if (L->getMemoryVT().isVector()) {
9050 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9051 if (LoadWidthInBit % 8 != 0)
9052 return SDValue();
9053 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9054 Ptr.addToOffset(ByteOffsetFromVector);
9055 }
9056
9057 if (!Base)
9058 Base = Ptr;
9059
9060 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9061 return SDValue();
9062
9063 // Calculate the offset of the current byte from the base address
9064 ByteOffsetFromBase += MemoryByteOffset(*P);
9065 ByteOffsets[i] = ByteOffsetFromBase;
9066
9067 // Remember the first byte load
9068 if (ByteOffsetFromBase < FirstOffset) {
9069 FirstByteProvider = P;
9070 FirstOffset = ByteOffsetFromBase;
9071 }
9072
9073 Loads.insert(L);
9074 }
9075
9076 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9077 "memory, so there must be at least one load which produces the value");
9078 assert(Base && "Base address of the accessed memory location must be set");
9079 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9080
9081 bool NeedsZext = ZeroExtendedBytes > 0;
9082
9083 EVT MemVT =
9084 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9085
9086 if (!MemVT.isSimple())
9087 return SDValue();
9088
9089 // Before legalize we can introduce too wide illegal loads which will be later
9090 // split into legal sized loads. This enables us to combine i64 load by i8
9091 // patterns to a couple of i32 loads on 32 bit targets.
9092 if (LegalOperations &&
9094 MemVT))
9095 return SDValue();
9096
9097 // Check if the bytes of the OR we are looking at match with either big or
9098 // little endian value load
9099 std::optional<bool> IsBigEndian = isBigEndian(
9100 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9101 if (!IsBigEndian)
9102 return SDValue();
9103
9104 assert(FirstByteProvider && "must be set");
9105
9106 // Ensure that the first byte is loaded from zero offset of the first load.
9107 // So the combined value can be loaded from the first load address.
9108 if (MemoryByteOffset(*FirstByteProvider) != 0)
9109 return SDValue();
9110 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9111
9112 // The node we are looking at matches with the pattern, check if we can
9113 // replace it with a single (possibly zero-extended) load and bswap + shift if
9114 // needed.
9115
9116 // If the load needs byte swap check if the target supports it
9117 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9118
9119 // Before legalize we can introduce illegal bswaps which will be later
9120 // converted to an explicit bswap sequence. This way we end up with a single
9121 // load and byte shuffling instead of several loads and byte shuffling.
9122 // We do not introduce illegal bswaps when zero-extending as this tends to
9123 // introduce too many arithmetic instructions.
9124 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9125 !TLI.isOperationLegal(ISD::BSWAP, VT))
9126 return SDValue();
9127
9128 // If we need to bswap and zero extend, we have to insert a shift. Check that
9129 // it is legal.
9130 if (NeedsBswap && NeedsZext && LegalOperations &&
9131 !TLI.isOperationLegal(ISD::SHL, VT))
9132 return SDValue();
9133
9134 // Check that a load of the wide type is both allowed and fast on the target
9135 unsigned Fast = 0;
9136 bool Allowed =
9137 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9138 *FirstLoad->getMemOperand(), &Fast);
9139 if (!Allowed || !Fast)
9140 return SDValue();
9141
9142 SDValue NewLoad =
9143 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9144 Chain, FirstLoad->getBasePtr(),
9145 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9146
9147 // Transfer chain users from old loads to the new load.
9148 for (LoadSDNode *L : Loads)
9149 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9150
9151 if (!NeedsBswap)
9152 return NewLoad;
9153
9154 SDValue ShiftedLoad =
9155 NeedsZext
9156 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9157 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9158 SDLoc(N), LegalOperations))
9159 : NewLoad;
9160 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9161}
9162
9163// If the target has andn, bsl, or a similar bit-select instruction,
9164// we want to unfold masked merge, with canonical pattern of:
9165// | A | |B|
9166// ((x ^ y) & m) ^ y
9167// | D |
9168// Into:
9169// (x & m) | (y & ~m)
9170// If y is a constant, m is not a 'not', and the 'andn' does not work with
9171// immediates, we unfold into a different pattern:
9172// ~(~x & m) & (m | y)
9173// If x is a constant, m is a 'not', and the 'andn' does not work with
9174// immediates, we unfold into a different pattern:
9175// (x | ~m) & ~(~m & ~y)
9176// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9177// the very least that breaks andnpd / andnps patterns, and because those
9178// patterns are simplified in IR and shouldn't be created in the DAG
9179SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9180 assert(N->getOpcode() == ISD::XOR);
9181
9182 // Don't touch 'not' (i.e. where y = -1).
9183 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9184 return SDValue();
9185
9186 EVT VT = N->getValueType(0);
9187
9188 // There are 3 commutable operators in the pattern,
9189 // so we have to deal with 8 possible variants of the basic pattern.
9190 SDValue X, Y, M;
9191 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9192 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9193 return false;
9194 SDValue Xor = And.getOperand(XorIdx);
9195 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9196 return false;
9197 SDValue Xor0 = Xor.getOperand(0);
9198 SDValue Xor1 = Xor.getOperand(1);
9199 // Don't touch 'not' (i.e. where y = -1).
9200 if (isAllOnesOrAllOnesSplat(Xor1))
9201 return false;
9202 if (Other == Xor0)
9203 std::swap(Xor0, Xor1);
9204 if (Other != Xor1)
9205 return false;
9206 X = Xor0;
9207 Y = Xor1;
9208 M = And.getOperand(XorIdx ? 0 : 1);
9209 return true;
9210 };
9211
9212 SDValue N0 = N->getOperand(0);
9213 SDValue N1 = N->getOperand(1);
9214 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9215 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9216 return SDValue();
9217
9218 // Don't do anything if the mask is constant. This should not be reachable.
9219 // InstCombine should have already unfolded this pattern, and DAGCombiner
9220 // probably shouldn't produce it, too.
9221 if (isa<ConstantSDNode>(M.getNode()))
9222 return SDValue();
9223
9224 // We can transform if the target has AndNot
9225 if (!TLI.hasAndNot(M))
9226 return SDValue();
9227
9228 SDLoc DL(N);
9229
9230 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9231 // a bitwise not that would already allow ANDN to be used.
9232 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9233 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9234 // If not, we need to do a bit more work to make sure andn is still used.
9235 SDValue NotX = DAG.getNOT(DL, X, VT);
9236 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9237 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9238 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9239 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9240 }
9241
9242 // If X is a constant and M is a bitwise not, check that 'andn' works with
9243 // immediates.
9244 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9245 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9246 // If not, we need to do a bit more work to make sure andn is still used.
9247 SDValue NotM = M.getOperand(0);
9248 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9249 SDValue NotY = DAG.getNOT(DL, Y, VT);
9250 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9251 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9252 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9253 }
9254
9255 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9256 SDValue NotM = DAG.getNOT(DL, M, VT);
9257 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9258
9259 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9260}
9261
9262SDValue DAGCombiner::visitXOR(SDNode *N) {
9263 SDValue N0 = N->getOperand(0);
9264 SDValue N1 = N->getOperand(1);
9265 EVT VT = N0.getValueType();
9266 SDLoc DL(N);
9267
9268 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9269 if (N0.isUndef() && N1.isUndef())
9270 return DAG.getConstant(0, DL, VT);
9271
9272 // fold (xor x, undef) -> undef
9273 if (N0.isUndef())
9274 return N0;
9275 if (N1.isUndef())
9276 return N1;
9277
9278 // fold (xor c1, c2) -> c1^c2
9279 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9280 return C;
9281
9282 // canonicalize constant to RHS
9285 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9286
9287 // fold vector ops
9288 if (VT.isVector()) {
9289 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9290 return FoldedVOp;
9291
9292 // fold (xor x, 0) -> x, vector edition
9294 return N0;
9295 }
9296
9297 // fold (xor x, 0) -> x
9298 if (isNullConstant(N1))
9299 return N0;
9300
9301 if (SDValue NewSel = foldBinOpIntoSelect(N))
9302 return NewSel;
9303
9304 // reassociate xor
9305 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9306 return RXOR;
9307
9308 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9309 if (SDValue SD =
9310 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9311 return SD;
9312
9313 // fold (a^b) -> (a|b) iff a and b share no bits.
9314 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9315 DAG.haveNoCommonBitsSet(N0, N1)) {
9317 Flags.setDisjoint(true);
9318 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9319 }
9320
9321 // look for 'add-like' folds:
9322 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9323 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9325 if (SDValue Combined = visitADDLike(N))
9326 return Combined;
9327
9328 // fold !(x cc y) -> (x !cc y)
9329 unsigned N0Opcode = N0.getOpcode();
9330 SDValue LHS, RHS, CC;
9331 if (TLI.isConstTrueVal(N1) &&
9332 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9333 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9334 LHS.getValueType());
9335 if (!LegalOperations ||
9336 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9337 switch (N0Opcode) {
9338 default:
9339 llvm_unreachable("Unhandled SetCC Equivalent!");
9340 case ISD::SETCC:
9341 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9342 case ISD::SELECT_CC:
9343 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9344 N0.getOperand(3), NotCC);
9345 case ISD::STRICT_FSETCC:
9346 case ISD::STRICT_FSETCCS: {
9347 if (N0.hasOneUse()) {
9348 // FIXME Can we handle multiple uses? Could we token factor the chain
9349 // results from the new/old setcc?
9350 SDValue SetCC =
9351 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9352 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9353 CombineTo(N, SetCC);
9354 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9355 recursivelyDeleteUnusedNodes(N0.getNode());
9356 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9357 }
9358 break;
9359 }
9360 }
9361 }
9362 }
9363
9364 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9365 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9366 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9367 SDValue V = N0.getOperand(0);
9368 SDLoc DL0(N0);
9369 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9370 DAG.getConstant(1, DL0, V.getValueType()));
9371 AddToWorklist(V.getNode());
9372 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9373 }
9374
9375 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9376 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9377 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9378 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9379 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9380 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9381 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9382 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9383 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9384 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9385 }
9386 }
9387 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9388 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9389 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9390 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9391 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9392 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9393 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9394 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9395 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9396 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9397 }
9398 }
9399
9400 // fold (not (neg x)) -> (add X, -1)
9401 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9402 // Y is a constant or the subtract has a single use.
9403 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9404 isNullConstant(N0.getOperand(0))) {
9405 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9406 DAG.getAllOnesConstant(DL, VT));
9407 }
9408
9409 // fold (not (add X, -1)) -> (neg X)
9410 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9412 return DAG.getNegative(N0.getOperand(0), DL, VT);
9413 }
9414
9415 // fold (xor (and x, y), y) -> (and (not x), y)
9416 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9417 SDValue X = N0.getOperand(0);
9418 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9419 AddToWorklist(NotX.getNode());
9420 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9421 }
9422
9423 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9424 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9425 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9426 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9427 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9428 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9429 SDValue S0 = S.getOperand(0);
9430 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9432 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9433 return DAG.getNode(ISD::ABS, DL, VT, S0);
9434 }
9435 }
9436
9437 // fold (xor x, x) -> 0
9438 if (N0 == N1)
9439 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9440
9441 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9442 // Here is a concrete example of this equivalence:
9443 // i16 x == 14
9444 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9445 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9446 //
9447 // =>
9448 //
9449 // i16 ~1 == 0b1111111111111110
9450 // i16 rol(~1, 14) == 0b1011111111111111
9451 //
9452 // Some additional tips to help conceptualize this transform:
9453 // - Try to see the operation as placing a single zero in a value of all ones.
9454 // - There exists no value for x which would allow the result to contain zero.
9455 // - Values of x larger than the bitwidth are undefined and do not require a
9456 // consistent result.
9457 // - Pushing the zero left requires shifting one bits in from the right.
9458 // A rotate left of ~1 is a nice way of achieving the desired result.
9459 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9461 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9462 N0.getOperand(1));
9463 }
9464
9465 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9466 if (N0Opcode == N1.getOpcode())
9467 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9468 return V;
9469
9470 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9471 return R;
9472 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9473 return R;
9474 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9475 return R;
9476
9477 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9478 if (SDValue MM = unfoldMaskedMerge(N))
9479 return MM;
9480
9481 // Simplify the expression using non-local knowledge.
9483 return SDValue(N, 0);
9484
9485 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9486 return Combined;
9487
9488 return SDValue();
9489}
9490
9491/// If we have a shift-by-constant of a bitwise logic op that itself has a
9492/// shift-by-constant operand with identical opcode, we may be able to convert
9493/// that into 2 independent shifts followed by the logic op. This is a
9494/// throughput improvement.
9496 // Match a one-use bitwise logic op.
9497 SDValue LogicOp = Shift->getOperand(0);
9498 if (!LogicOp.hasOneUse())
9499 return SDValue();
9500
9501 unsigned LogicOpcode = LogicOp.getOpcode();
9502 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9503 LogicOpcode != ISD::XOR)
9504 return SDValue();
9505
9506 // Find a matching one-use shift by constant.
9507 unsigned ShiftOpcode = Shift->getOpcode();
9508 SDValue C1 = Shift->getOperand(1);
9509 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9510 assert(C1Node && "Expected a shift with constant operand");
9511 const APInt &C1Val = C1Node->getAPIntValue();
9512 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9513 const APInt *&ShiftAmtVal) {
9514 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9515 return false;
9516
9517 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9518 if (!ShiftCNode)
9519 return false;
9520
9521 // Capture the shifted operand and shift amount value.
9522 ShiftOp = V.getOperand(0);
9523 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9524
9525 // Shift amount types do not have to match their operand type, so check that
9526 // the constants are the same width.
9527 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9528 return false;
9529
9530 // The fold is not valid if the sum of the shift values doesn't fit in the
9531 // given shift amount type.
9532 bool Overflow = false;
9533 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9534 if (Overflow)
9535 return false;
9536
9537 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9538 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9539 return false;
9540
9541 return true;
9542 };
9543
9544 // Logic ops are commutative, so check each operand for a match.
9545 SDValue X, Y;
9546 const APInt *C0Val;
9547 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9548 Y = LogicOp.getOperand(1);
9549 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9550 Y = LogicOp.getOperand(0);
9551 else
9552 return SDValue();
9553
9554 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9555 SDLoc DL(Shift);
9556 EVT VT = Shift->getValueType(0);
9557 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9558 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9559 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9560 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9561 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9562 LogicOp->getFlags());
9563}
9564
9565/// Handle transforms common to the three shifts, when the shift amount is a
9566/// constant.
9567/// We are looking for: (shift being one of shl/sra/srl)
9568/// shift (binop X, C0), C1
9569/// And want to transform into:
9570/// binop (shift X, C1), (shift C0, C1)
9571SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9572 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9573
9574 // Do not turn a 'not' into a regular xor.
9575 if (isBitwiseNot(N->getOperand(0)))
9576 return SDValue();
9577
9578 // The inner binop must be one-use, since we want to replace it.
9579 SDValue LHS = N->getOperand(0);
9580 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9581 return SDValue();
9582
9583 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9584 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9585 return R;
9586
9587 // We want to pull some binops through shifts, so that we have (and (shift))
9588 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9589 // thing happens with address calculations, so it's important to canonicalize
9590 // it.
9591 switch (LHS.getOpcode()) {
9592 default:
9593 return SDValue();
9594 case ISD::OR:
9595 case ISD::XOR:
9596 case ISD::AND:
9597 break;
9598 case ISD::ADD:
9599 if (N->getOpcode() != ISD::SHL)
9600 return SDValue(); // only shl(add) not sr[al](add).
9601 break;
9602 }
9603
9604 // FIXME: disable this unless the input to the binop is a shift by a constant
9605 // or is copy/select. Enable this in other cases when figure out it's exactly
9606 // profitable.
9607 SDValue BinOpLHSVal = LHS.getOperand(0);
9608 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9609 BinOpLHSVal.getOpcode() == ISD::SRA ||
9610 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9611 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9612 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9613 BinOpLHSVal.getOpcode() == ISD::SELECT;
9614
9615 if (!IsShiftByConstant && !IsCopyOrSelect)
9616 return SDValue();
9617
9618 if (IsCopyOrSelect && N->hasOneUse())
9619 return SDValue();
9620
9621 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9622 SDLoc DL(N);
9623 EVT VT = N->getValueType(0);
9624 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9625 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9626 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9627 N->getOperand(1));
9628 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9629 }
9630
9631 return SDValue();
9632}
9633
9634SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9635 assert(N->getOpcode() == ISD::TRUNCATE);
9636 assert(N->getOperand(0).getOpcode() == ISD::AND);
9637
9638 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9639 EVT TruncVT = N->getValueType(0);
9640 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9641 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9642 SDValue N01 = N->getOperand(0).getOperand(1);
9643 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9644 SDLoc DL(N);
9645 SDValue N00 = N->getOperand(0).getOperand(0);
9646 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9647 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9648 AddToWorklist(Trunc00.getNode());
9649 AddToWorklist(Trunc01.getNode());
9650 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9651 }
9652 }
9653
9654 return SDValue();
9655}
9656
9657SDValue DAGCombiner::visitRotate(SDNode *N) {
9658 SDLoc dl(N);
9659 SDValue N0 = N->getOperand(0);
9660 SDValue N1 = N->getOperand(1);
9661 EVT VT = N->getValueType(0);
9662 unsigned Bitsize = VT.getScalarSizeInBits();
9663
9664 // fold (rot x, 0) -> x
9665 if (isNullOrNullSplat(N1))
9666 return N0;
9667
9668 // fold (rot x, c) -> x iff (c % BitSize) == 0
9669 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9670 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9671 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9672 return N0;
9673 }
9674
9675 // fold (rot x, c) -> (rot x, c % BitSize)
9676 bool OutOfRange = false;
9677 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9678 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9679 return true;
9680 };
9681 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9682 EVT AmtVT = N1.getValueType();
9683 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9684 if (SDValue Amt =
9685 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9686 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9687 }
9688
9689 // rot i16 X, 8 --> bswap X
9690 auto *RotAmtC = isConstOrConstSplat(N1);
9691 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9692 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9693 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9694
9695 // Simplify the operands using demanded-bits information.
9697 return SDValue(N, 0);
9698
9699 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9700 if (N1.getOpcode() == ISD::TRUNCATE &&
9701 N1.getOperand(0).getOpcode() == ISD::AND) {
9702 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9703 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9704 }
9705
9706 unsigned NextOp = N0.getOpcode();
9707
9708 // fold (rot* (rot* x, c2), c1)
9709 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9710 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9713 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9714 EVT ShiftVT = C1->getValueType(0);
9715 bool SameSide = (N->getOpcode() == NextOp);
9716 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9717 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9718 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9719 {N1, BitsizeC});
9720 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9721 {N0.getOperand(1), BitsizeC});
9722 if (Norm1 && Norm2)
9723 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9724 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9725 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9726 {CombinedShift, BitsizeC});
9727 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9728 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9729 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9730 CombinedShiftNorm);
9731 }
9732 }
9733 }
9734 return SDValue();
9735}
9736
9737SDValue DAGCombiner::visitSHL(SDNode *N) {
9738 SDValue N0 = N->getOperand(0);
9739 SDValue N1 = N->getOperand(1);
9740 if (SDValue V = DAG.simplifyShift(N0, N1))
9741 return V;
9742
9743 SDLoc DL(N);
9744 EVT VT = N0.getValueType();
9745 EVT ShiftVT = N1.getValueType();
9746 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9747
9748 // fold (shl c1, c2) -> c1<<c2
9749 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9750 return C;
9751
9752 // fold vector ops
9753 if (VT.isVector()) {
9754 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9755 return FoldedVOp;
9756
9757 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9758 // If setcc produces all-one true value then:
9759 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9760 if (N1CV && N1CV->isConstant()) {
9761 if (N0.getOpcode() == ISD::AND) {
9762 SDValue N00 = N0->getOperand(0);
9763 SDValue N01 = N0->getOperand(1);
9764 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9765
9766 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9769 if (SDValue C =
9770 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9771 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9772 }
9773 }
9774 }
9775 }
9776
9777 if (SDValue NewSel = foldBinOpIntoSelect(N))
9778 return NewSel;
9779
9780 // if (shl x, c) is known to be zero, return 0
9781 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9782 return DAG.getConstant(0, DL, VT);
9783
9784 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9785 if (N1.getOpcode() == ISD::TRUNCATE &&
9786 N1.getOperand(0).getOpcode() == ISD::AND) {
9787 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9788 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9789 }
9790
9791 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9792 if (N0.getOpcode() == ISD::SHL) {
9793 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9795 APInt c1 = LHS->getAPIntValue();
9796 APInt c2 = RHS->getAPIntValue();
9797 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9798 return (c1 + c2).uge(OpSizeInBits);
9799 };
9800 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9801 return DAG.getConstant(0, DL, VT);
9802
9803 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9805 APInt c1 = LHS->getAPIntValue();
9806 APInt c2 = RHS->getAPIntValue();
9807 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9808 return (c1 + c2).ult(OpSizeInBits);
9809 };
9810 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9811 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9812 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9813 }
9814 }
9815
9816 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9817 // For this to be valid, the second form must not preserve any of the bits
9818 // that are shifted out by the inner shift in the first form. This means
9819 // the outer shift size must be >= the number of bits added by the ext.
9820 // As a corollary, we don't care what kind of ext it is.
9821 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9822 N0.getOpcode() == ISD::ANY_EXTEND ||
9823 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9824 N0.getOperand(0).getOpcode() == ISD::SHL) {
9825 SDValue N0Op0 = N0.getOperand(0);
9826 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9827 EVT InnerVT = N0Op0.getValueType();
9828 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9829
9830 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9832 APInt c1 = LHS->getAPIntValue();
9833 APInt c2 = RHS->getAPIntValue();
9834 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9835 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9836 (c1 + c2).uge(OpSizeInBits);
9837 };
9838 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9839 /*AllowUndefs*/ false,
9840 /*AllowTypeMismatch*/ true))
9841 return DAG.getConstant(0, DL, VT);
9842
9843 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9845 APInt c1 = LHS->getAPIntValue();
9846 APInt c2 = RHS->getAPIntValue();
9847 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9848 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9849 (c1 + c2).ult(OpSizeInBits);
9850 };
9851 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9852 /*AllowUndefs*/ false,
9853 /*AllowTypeMismatch*/ true)) {
9854 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9855 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9856 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9857 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9858 }
9859 }
9860
9861 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9862 // Only fold this if the inner zext has no other uses to avoid increasing
9863 // the total number of instructions.
9864 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9865 N0.getOperand(0).getOpcode() == ISD::SRL) {
9866 SDValue N0Op0 = N0.getOperand(0);
9867 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9868
9869 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9870 APInt c1 = LHS->getAPIntValue();
9871 APInt c2 = RHS->getAPIntValue();
9872 zeroExtendToMatch(c1, c2);
9873 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9874 };
9875 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9876 /*AllowUndefs*/ false,
9877 /*AllowTypeMismatch*/ true)) {
9878 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9879 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9880 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9881 AddToWorklist(NewSHL.getNode());
9882 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9883 }
9884 }
9885
9886 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9887 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9889 const APInt &LHSC = LHS->getAPIntValue();
9890 const APInt &RHSC = RHS->getAPIntValue();
9891 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9892 LHSC.getZExtValue() <= RHSC.getZExtValue();
9893 };
9894
9895 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9896 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9897 if (N0->getFlags().hasExact()) {
9898 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9899 /*AllowUndefs*/ false,
9900 /*AllowTypeMismatch*/ true)) {
9901 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9902 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9903 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9904 }
9905 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9906 /*AllowUndefs*/ false,
9907 /*AllowTypeMismatch*/ true)) {
9908 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9909 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9910 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9911 }
9912 }
9913
9914 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9915 // (and (srl x, (sub c1, c2), MASK)
9916 // Only fold this if the inner shift has no other uses -- if it does,
9917 // folding this will increase the total number of instructions.
9918 if (N0.getOpcode() == ISD::SRL &&
9919 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9921 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9922 /*AllowUndefs*/ false,
9923 /*AllowTypeMismatch*/ true)) {
9924 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9925 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9926 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9927 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9928 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9929 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9930 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9931 }
9932 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9933 /*AllowUndefs*/ false,
9934 /*AllowTypeMismatch*/ true)) {
9935 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9936 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9937 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9938 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9939 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9940 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9941 }
9942 }
9943 }
9944
9945 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9946 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9947 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9948 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9949 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9950 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9951 }
9952
9953 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9954 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9955 // Variant of version done on multiply, except mul by a power of 2 is turned
9956 // into a shift.
9957 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9958 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
9959 SDValue N01 = N0.getOperand(1);
9960 if (SDValue Shl1 =
9961 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
9962 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9963 AddToWorklist(Shl0.getNode());
9965 // Preserve the disjoint flag for Or.
9966 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
9967 Flags.setDisjoint(true);
9968 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
9969 }
9970 }
9971
9972 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
9973 // TODO: Add zext/add_nuw variant with suitable test coverage
9974 // TODO: Should we limit this with isLegalAddImmediate?
9975 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
9976 N0.getOperand(0).getOpcode() == ISD::ADD &&
9977 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
9978 N0.getOperand(0)->hasOneUse() &&
9979 TLI.isDesirableToCommuteWithShift(N, Level)) {
9980 SDValue Add = N0.getOperand(0);
9981 SDLoc DL(N0);
9982 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
9983 {Add.getOperand(1)})) {
9984 if (SDValue ShlC =
9985 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
9986 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
9987 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
9988 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
9989 }
9990 }
9991 }
9992
9993 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9994 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9995 SDValue N01 = N0.getOperand(1);
9996 if (SDValue Shl =
9997 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9998 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
9999 }
10000
10002 if (N1C && !N1C->isOpaque())
10003 if (SDValue NewSHL = visitShiftByConstant(N))
10004 return NewSHL;
10005
10007 return SDValue(N, 0);
10008
10009 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10010 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10011 const APInt &C0 = N0.getConstantOperandAPInt(0);
10012 const APInt &C1 = N1C->getAPIntValue();
10013 return DAG.getVScale(DL, VT, C0 << C1);
10014 }
10015
10016 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10017 APInt ShlVal;
10018 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10019 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10020 const APInt &C0 = N0.getConstantOperandAPInt(0);
10021 if (ShlVal.ult(C0.getBitWidth())) {
10022 APInt NewStep = C0 << ShlVal;
10023 return DAG.getStepVector(DL, VT, NewStep);
10024 }
10025 }
10026
10027 return SDValue();
10028}
10029
10030// Transform a right shift of a multiply into a multiply-high.
10031// Examples:
10032// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10033// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10035 const TargetLowering &TLI) {
10036 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10037 "SRL or SRA node is required here!");
10038
10039 // Check the shift amount. Proceed with the transformation if the shift
10040 // amount is constant.
10041 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10042 if (!ShiftAmtSrc)
10043 return SDValue();
10044
10045 // The operation feeding into the shift must be a multiply.
10046 SDValue ShiftOperand = N->getOperand(0);
10047 if (ShiftOperand.getOpcode() != ISD::MUL)
10048 return SDValue();
10049
10050 // Both operands must be equivalent extend nodes.
10051 SDValue LeftOp = ShiftOperand.getOperand(0);
10052 SDValue RightOp = ShiftOperand.getOperand(1);
10053
10054 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10055 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10056
10057 if (!IsSignExt && !IsZeroExt)
10058 return SDValue();
10059
10060 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10061 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10062
10063 // return true if U may use the lower bits of its operands
10064 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10065 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10066 return true;
10067 }
10068 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10069 if (!UShiftAmtSrc) {
10070 return true;
10071 }
10072 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10073 return UShiftAmt < NarrowVTSize;
10074 };
10075
10076 // If the lower part of the MUL is also used and MUL_LOHI is supported
10077 // do not introduce the MULH in favor of MUL_LOHI
10078 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10079 if (!ShiftOperand.hasOneUse() &&
10080 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10081 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10082 return SDValue();
10083 }
10084
10085 SDValue MulhRightOp;
10087 unsigned ActiveBits = IsSignExt
10088 ? Constant->getAPIntValue().getSignificantBits()
10089 : Constant->getAPIntValue().getActiveBits();
10090 if (ActiveBits > NarrowVTSize)
10091 return SDValue();
10092 MulhRightOp = DAG.getConstant(
10093 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10094 NarrowVT);
10095 } else {
10096 if (LeftOp.getOpcode() != RightOp.getOpcode())
10097 return SDValue();
10098 // Check that the two extend nodes are the same type.
10099 if (NarrowVT != RightOp.getOperand(0).getValueType())
10100 return SDValue();
10101 MulhRightOp = RightOp.getOperand(0);
10102 }
10103
10104 EVT WideVT = LeftOp.getValueType();
10105 // Proceed with the transformation if the wide types match.
10106 assert((WideVT == RightOp.getValueType()) &&
10107 "Cannot have a multiply node with two different operand types.");
10108
10109 // Proceed with the transformation if the wide type is twice as large
10110 // as the narrow type.
10111 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10112 return SDValue();
10113
10114 // Check the shift amount with the narrow type size.
10115 // Proceed with the transformation if the shift amount is the width
10116 // of the narrow type.
10117 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10118 if (ShiftAmt != NarrowVTSize)
10119 return SDValue();
10120
10121 // If the operation feeding into the MUL is a sign extend (sext),
10122 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10123 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10124
10125 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10126 // or if it is a vector type then we could transform to an acceptable type and
10127 // rely on legalization to split/combine the result.
10128 if (NarrowVT.isVector()) {
10129 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10130 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10131 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10132 return SDValue();
10133 } else {
10134 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10135 return SDValue();
10136 }
10137
10138 SDValue Result =
10139 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10140 bool IsSigned = N->getOpcode() == ISD::SRA;
10141 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10142}
10143
10144// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10145// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10147 unsigned Opcode = N->getOpcode();
10148 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10149 return SDValue();
10150
10151 SDValue N0 = N->getOperand(0);
10152 EVT VT = N->getValueType(0);
10153 SDLoc DL(N);
10154 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10155 SDValue OldLHS = N0.getOperand(0);
10156 SDValue OldRHS = N0.getOperand(1);
10157
10158 // If both operands are bswap/bitreverse, ignore the multiuse
10159 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10160 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10161 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10162 OldRHS.getOperand(0));
10163 }
10164
10165 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10166 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10167 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10168 NewBitReorder);
10169 }
10170
10171 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10172 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10173 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10174 OldRHS.getOperand(0));
10175 }
10176 }
10177 return SDValue();
10178}
10179
10180SDValue DAGCombiner::visitSRA(SDNode *N) {
10181 SDValue N0 = N->getOperand(0);
10182 SDValue N1 = N->getOperand(1);
10183 if (SDValue V = DAG.simplifyShift(N0, N1))
10184 return V;
10185
10186 SDLoc DL(N);
10187 EVT VT = N0.getValueType();
10188 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10189
10190 // fold (sra c1, c2) -> (sra c1, c2)
10191 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10192 return C;
10193
10194 // Arithmetic shifting an all-sign-bit value is a no-op.
10195 // fold (sra 0, x) -> 0
10196 // fold (sra -1, x) -> -1
10197 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10198 return N0;
10199
10200 // fold vector ops
10201 if (VT.isVector())
10202 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10203 return FoldedVOp;
10204
10205 if (SDValue NewSel = foldBinOpIntoSelect(N))
10206 return NewSel;
10207
10209
10210 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10211 // clamp (add c1, c2) to max shift.
10212 if (N0.getOpcode() == ISD::SRA) {
10213 EVT ShiftVT = N1.getValueType();
10214 EVT ShiftSVT = ShiftVT.getScalarType();
10215 SmallVector<SDValue, 16> ShiftValues;
10216
10217 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10218 APInt c1 = LHS->getAPIntValue();
10219 APInt c2 = RHS->getAPIntValue();
10220 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10221 APInt Sum = c1 + c2;
10222 unsigned ShiftSum =
10223 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10224 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10225 return true;
10226 };
10227 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10228 SDValue ShiftValue;
10229 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10230 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10231 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10232 assert(ShiftValues.size() == 1 &&
10233 "Expected matchBinaryPredicate to return one element for "
10234 "SPLAT_VECTORs");
10235 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10236 } else
10237 ShiftValue = ShiftValues[0];
10238 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10239 }
10240 }
10241
10242 // fold (sra (shl X, m), (sub result_size, n))
10243 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10244 // result_size - n != m.
10245 // If truncate is free for the target sext(shl) is likely to result in better
10246 // code.
10247 if (N0.getOpcode() == ISD::SHL && N1C) {
10248 // Get the two constants of the shifts, CN0 = m, CN = n.
10249 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10250 if (N01C) {
10251 LLVMContext &Ctx = *DAG.getContext();
10252 // Determine what the truncate's result bitsize and type would be.
10253 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10254
10255 if (VT.isVector())
10256 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10257
10258 // Determine the residual right-shift amount.
10259 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10260
10261 // If the shift is not a no-op (in which case this should be just a sign
10262 // extend already), the truncated to type is legal, sign_extend is legal
10263 // on that type, and the truncate to that type is both legal and free,
10264 // perform the transform.
10265 if ((ShiftAmt > 0) &&
10268 TLI.isTruncateFree(VT, TruncVT)) {
10269 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10271 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10272 N0.getOperand(0), Amt);
10273 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10274 Shift);
10275 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10276 N->getValueType(0), Trunc);
10277 }
10278 }
10279 }
10280
10281 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10282 // sra (add (shl X, N1C), AddC), N1C -->
10283 // sext (add (trunc X to (width - N1C)), AddC')
10284 // sra (sub AddC, (shl X, N1C)), N1C -->
10285 // sext (sub AddC1',(trunc X to (width - N1C)))
10286 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10287 N0.hasOneUse()) {
10288 bool IsAdd = N0.getOpcode() == ISD::ADD;
10289 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10290 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10291 Shl.hasOneUse()) {
10292 // TODO: AddC does not need to be a splat.
10293 if (ConstantSDNode *AddC =
10294 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10295 // Determine what the truncate's type would be and ask the target if
10296 // that is a free operation.
10297 LLVMContext &Ctx = *DAG.getContext();
10298 unsigned ShiftAmt = N1C->getZExtValue();
10299 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10300 if (VT.isVector())
10301 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10302
10303 // TODO: The simple type check probably belongs in the default hook
10304 // implementation and/or target-specific overrides (because
10305 // non-simple types likely require masking when legalized), but
10306 // that restriction may conflict with other transforms.
10307 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10308 TLI.isTruncateFree(VT, TruncVT)) {
10309 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10310 SDValue ShiftC =
10311 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10312 TruncVT.getScalarSizeInBits()),
10313 DL, TruncVT);
10314 SDValue Add;
10315 if (IsAdd)
10316 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10317 else
10318 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10319 return DAG.getSExtOrTrunc(Add, DL, VT);
10320 }
10321 }
10322 }
10323 }
10324
10325 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10326 if (N1.getOpcode() == ISD::TRUNCATE &&
10327 N1.getOperand(0).getOpcode() == ISD::AND) {
10328 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10329 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10330 }
10331
10332 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10333 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10334 // if c1 is equal to the number of bits the trunc removes
10335 // TODO - support non-uniform vector shift amounts.
10336 if (N0.getOpcode() == ISD::TRUNCATE &&
10337 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10338 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10339 N0.getOperand(0).hasOneUse() &&
10340 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10341 SDValue N0Op0 = N0.getOperand(0);
10342 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10343 EVT LargeVT = N0Op0.getValueType();
10344 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10345 if (LargeShift->getAPIntValue() == TruncBits) {
10346 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10347 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10348 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10349 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10350 SDValue SRA =
10351 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10352 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10353 }
10354 }
10355 }
10356
10357 // Simplify, based on bits shifted out of the LHS.
10359 return SDValue(N, 0);
10360
10361 // If the sign bit is known to be zero, switch this to a SRL.
10362 if (DAG.SignBitIsZero(N0))
10363 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10364
10365 if (N1C && !N1C->isOpaque())
10366 if (SDValue NewSRA = visitShiftByConstant(N))
10367 return NewSRA;
10368
10369 // Try to transform this shift into a multiply-high if
10370 // it matches the appropriate pattern detected in combineShiftToMULH.
10371 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10372 return MULH;
10373
10374 // Attempt to convert a sra of a load into a narrower sign-extending load.
10375 if (SDValue NarrowLoad = reduceLoadWidth(N))
10376 return NarrowLoad;
10377
10378 return SDValue();
10379}
10380
10381SDValue DAGCombiner::visitSRL(SDNode *N) {
10382 SDValue N0 = N->getOperand(0);
10383 SDValue N1 = N->getOperand(1);
10384 if (SDValue V = DAG.simplifyShift(N0, N1))
10385 return V;
10386
10387 SDLoc DL(N);
10388 EVT VT = N0.getValueType();
10389 EVT ShiftVT = N1.getValueType();
10390 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10391
10392 // fold (srl c1, c2) -> c1 >>u c2
10393 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10394 return C;
10395
10396 // fold vector ops
10397 if (VT.isVector())
10398 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10399 return FoldedVOp;
10400
10401 if (SDValue NewSel = foldBinOpIntoSelect(N))
10402 return NewSel;
10403
10404 // if (srl x, c) is known to be zero, return 0
10406 if (N1C &&
10407 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10408 return DAG.getConstant(0, DL, VT);
10409
10410 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10411 if (N0.getOpcode() == ISD::SRL) {
10412 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10414 APInt c1 = LHS->getAPIntValue();
10415 APInt c2 = RHS->getAPIntValue();
10416 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10417 return (c1 + c2).uge(OpSizeInBits);
10418 };
10419 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10420 return DAG.getConstant(0, DL, VT);
10421
10422 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10424 APInt c1 = LHS->getAPIntValue();
10425 APInt c2 = RHS->getAPIntValue();
10426 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10427 return (c1 + c2).ult(OpSizeInBits);
10428 };
10429 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10430 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10431 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10432 }
10433 }
10434
10435 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10436 N0.getOperand(0).getOpcode() == ISD::SRL) {
10437 SDValue InnerShift = N0.getOperand(0);
10438 // TODO - support non-uniform vector shift amounts.
10439 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10440 uint64_t c1 = N001C->getZExtValue();
10441 uint64_t c2 = N1C->getZExtValue();
10442 EVT InnerShiftVT = InnerShift.getValueType();
10443 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10444 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10445 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10446 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10447 if (c1 + OpSizeInBits == InnerShiftSize) {
10448 if (c1 + c2 >= InnerShiftSize)
10449 return DAG.getConstant(0, DL, VT);
10450 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10451 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10452 InnerShift.getOperand(0), NewShiftAmt);
10453 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10454 }
10455 // In the more general case, we can clear the high bits after the shift:
10456 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10457 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10458 c1 + c2 < InnerShiftSize) {
10459 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10460 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10461 InnerShift.getOperand(0), NewShiftAmt);
10462 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10463 OpSizeInBits - c2),
10464 DL, InnerShiftVT);
10465 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10466 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10467 }
10468 }
10469 }
10470
10471 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10472 // (and (srl x, (sub c2, c1), MASK)
10473 if (N0.getOpcode() == ISD::SHL &&
10474 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10476 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10478 const APInt &LHSC = LHS->getAPIntValue();
10479 const APInt &RHSC = RHS->getAPIntValue();
10480 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10481 LHSC.getZExtValue() <= RHSC.getZExtValue();
10482 };
10483 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10484 /*AllowUndefs*/ false,
10485 /*AllowTypeMismatch*/ true)) {
10486 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10487 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10488 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10489 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10490 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10491 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10492 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10493 }
10494 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10495 /*AllowUndefs*/ false,
10496 /*AllowTypeMismatch*/ true)) {
10497 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10498 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10499 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10500 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10501 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10502 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10503 }
10504 }
10505
10506 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10507 // TODO - support non-uniform vector shift amounts.
10508 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10509 // Shifting in all undef bits?
10510 EVT SmallVT = N0.getOperand(0).getValueType();
10511 unsigned BitSize = SmallVT.getScalarSizeInBits();
10512 if (N1C->getAPIntValue().uge(BitSize))
10513 return DAG.getUNDEF(VT);
10514
10515 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10516 uint64_t ShiftAmt = N1C->getZExtValue();
10517 SDLoc DL0(N0);
10518 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10519 N0.getOperand(0),
10520 DAG.getConstant(ShiftAmt, DL0,
10521 getShiftAmountTy(SmallVT)));
10522 AddToWorklist(SmallShift.getNode());
10523 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10524 return DAG.getNode(ISD::AND, DL, VT,
10525 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10526 DAG.getConstant(Mask, DL, VT));
10527 }
10528 }
10529
10530 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10531 // bit, which is unmodified by sra.
10532 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10533 if (N0.getOpcode() == ISD::SRA)
10534 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10535 }
10536
10537 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10538 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10539 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10540 isPowerOf2_32(OpSizeInBits) &&
10541 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10542 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10543
10544 // If any of the input bits are KnownOne, then the input couldn't be all
10545 // zeros, thus the result of the srl will always be zero.
10546 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10547
10548 // If all of the bits input the to ctlz node are known to be zero, then
10549 // the result of the ctlz is "32" and the result of the shift is one.
10550 APInt UnknownBits = ~Known.Zero;
10551 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10552
10553 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10554 if (UnknownBits.isPowerOf2()) {
10555 // Okay, we know that only that the single bit specified by UnknownBits
10556 // could be set on input to the CTLZ node. If this bit is set, the SRL
10557 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10558 // to an SRL/XOR pair, which is likely to simplify more.
10559 unsigned ShAmt = UnknownBits.countr_zero();
10560 SDValue Op = N0.getOperand(0);
10561
10562 if (ShAmt) {
10563 SDLoc DL(N0);
10564 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10565 DAG.getConstant(ShAmt, DL,
10566 getShiftAmountTy(Op.getValueType())));
10567 AddToWorklist(Op.getNode());
10568 }
10569 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10570 }
10571 }
10572
10573 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10574 if (N1.getOpcode() == ISD::TRUNCATE &&
10575 N1.getOperand(0).getOpcode() == ISD::AND) {
10576 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10577 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10578 }
10579
10580 // fold operands of srl based on knowledge that the low bits are not
10581 // demanded.
10583 return SDValue(N, 0);
10584
10585 if (N1C && !N1C->isOpaque())
10586 if (SDValue NewSRL = visitShiftByConstant(N))
10587 return NewSRL;
10588
10589 // Attempt to convert a srl of a load into a narrower zero-extending load.
10590 if (SDValue NarrowLoad = reduceLoadWidth(N))
10591 return NarrowLoad;
10592
10593 // Here is a common situation. We want to optimize:
10594 //
10595 // %a = ...
10596 // %b = and i32 %a, 2
10597 // %c = srl i32 %b, 1
10598 // brcond i32 %c ...
10599 //
10600 // into
10601 //
10602 // %a = ...
10603 // %b = and %a, 2
10604 // %c = setcc eq %b, 0
10605 // brcond %c ...
10606 //
10607 // However when after the source operand of SRL is optimized into AND, the SRL
10608 // itself may not be optimized further. Look for it and add the BRCOND into
10609 // the worklist.
10610 //
10611 // The also tends to happen for binary operations when SimplifyDemandedBits
10612 // is involved.
10613 //
10614 // FIXME: This is unecessary if we process the DAG in topological order,
10615 // which we plan to do. This workaround can be removed once the DAG is
10616 // processed in topological order.
10617 if (N->hasOneUse()) {
10618 SDNode *Use = *N->use_begin();
10619
10620 // Look pass the truncate.
10621 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10622 Use = *Use->use_begin();
10623
10624 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10625 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10626 AddToWorklist(Use);
10627 }
10628
10629 // Try to transform this shift into a multiply-high if
10630 // it matches the appropriate pattern detected in combineShiftToMULH.
10631 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10632 return MULH;
10633
10634 return SDValue();
10635}
10636
10637SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10638 EVT VT = N->getValueType(0);
10639 SDValue N0 = N->getOperand(0);
10640 SDValue N1 = N->getOperand(1);
10641 SDValue N2 = N->getOperand(2);
10642 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10643 unsigned BitWidth = VT.getScalarSizeInBits();
10644
10645 // fold (fshl N0, N1, 0) -> N0
10646 // fold (fshr N0, N1, 0) -> N1
10648 if (DAG.MaskedValueIsZero(
10649 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10650 return IsFSHL ? N0 : N1;
10651
10652 auto IsUndefOrZero = [](SDValue V) {
10653 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10654 };
10655
10656 // TODO - support non-uniform vector shift amounts.
10657 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10658 EVT ShAmtTy = N2.getValueType();
10659
10660 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10661 if (Cst->getAPIntValue().uge(BitWidth)) {
10662 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10663 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10664 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10665 }
10666
10667 unsigned ShAmt = Cst->getZExtValue();
10668 if (ShAmt == 0)
10669 return IsFSHL ? N0 : N1;
10670
10671 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10672 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10673 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10674 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10675 if (IsUndefOrZero(N0))
10676 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10677 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10678 SDLoc(N), ShAmtTy));
10679 if (IsUndefOrZero(N1))
10680 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10681 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10682 SDLoc(N), ShAmtTy));
10683
10684 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10685 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10686 // TODO - bigendian support once we have test coverage.
10687 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10688 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10689 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10690 !DAG.getDataLayout().isBigEndian()) {
10691 auto *LHS = dyn_cast<LoadSDNode>(N0);
10692 auto *RHS = dyn_cast<LoadSDNode>(N1);
10693 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10694 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10695 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10696 ISD::isNON_EXTLoad(LHS)) {
10697 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10698 SDLoc DL(RHS);
10699 uint64_t PtrOff =
10700 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10701 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10702 unsigned Fast = 0;
10703 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10704 RHS->getAddressSpace(), NewAlign,
10705 RHS->getMemOperand()->getFlags(), &Fast) &&
10706 Fast) {
10707 SDValue NewPtr = DAG.getMemBasePlusOffset(
10708 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10709 AddToWorklist(NewPtr.getNode());
10710 SDValue Load = DAG.getLoad(
10711 VT, DL, RHS->getChain(), NewPtr,
10712 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10713 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10714 // Replace the old load's chain with the new load's chain.
10715 WorklistRemover DeadNodes(*this);
10716 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10717 return Load;
10718 }
10719 }
10720 }
10721 }
10722 }
10723
10724 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10725 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10726 // iff We know the shift amount is in range.
10727 // TODO: when is it worth doing SUB(BW, N2) as well?
10728 if (isPowerOf2_32(BitWidth)) {
10729 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10730 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10731 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10732 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10733 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10734 }
10735
10736 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10737 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10738 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10739 // is legal as well we might be better off avoiding non-constant (BW - N2).
10740 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10741 if (N0 == N1 && hasOperation(RotOpc, VT))
10742 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10743
10744 // Simplify, based on bits shifted out of N0/N1.
10746 return SDValue(N, 0);
10747
10748 return SDValue();
10749}
10750
10751SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10752 SDValue N0 = N->getOperand(0);
10753 SDValue N1 = N->getOperand(1);
10754 if (SDValue V = DAG.simplifyShift(N0, N1))
10755 return V;
10756
10757 SDLoc DL(N);
10758 EVT VT = N0.getValueType();
10759
10760 // fold (*shlsat c1, c2) -> c1<<c2
10761 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10762 return C;
10763
10765
10766 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10767 // fold (sshlsat x, c) -> (shl x, c)
10768 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10769 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10770 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10771
10772 // fold (ushlsat x, c) -> (shl x, c)
10773 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10774 N1C->getAPIntValue().ule(
10776 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10777 }
10778
10779 return SDValue();
10780}
10781
10782// Given a ABS node, detect the following patterns:
10783// (ABS (SUB (EXTEND a), (EXTEND b))).
10784// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10785// Generates UABD/SABD instruction.
10786SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10787 EVT SrcVT = N->getValueType(0);
10788
10789 if (N->getOpcode() == ISD::TRUNCATE)
10790 N = N->getOperand(0).getNode();
10791
10792 if (N->getOpcode() != ISD::ABS)
10793 return SDValue();
10794
10795 EVT VT = N->getValueType(0);
10796 SDValue AbsOp1 = N->getOperand(0);
10797 SDValue Op0, Op1;
10798
10799 if (AbsOp1.getOpcode() != ISD::SUB)
10800 return SDValue();
10801
10802 Op0 = AbsOp1.getOperand(0);
10803 Op1 = AbsOp1.getOperand(1);
10804
10805 unsigned Opc0 = Op0.getOpcode();
10806
10807 // Check if the operands of the sub are (zero|sign)-extended.
10808 // TODO: Should we use ValueTracking instead?
10809 if (Opc0 != Op1.getOpcode() ||
10810 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10811 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10812 // fold (abs (sub nsw x, y)) -> abds(x, y)
10813 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10814 TLI.preferABDSToABSWithNSW(VT)) {
10815 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10816 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10817 }
10818 return SDValue();
10819 }
10820
10821 EVT VT0, VT1;
10822 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10823 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10824 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10825 } else {
10826 VT0 = Op0.getOperand(0).getValueType();
10827 VT1 = Op1.getOperand(0).getValueType();
10828 }
10829 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10830
10831 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10832 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10833 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10834 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10835 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10836 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10837 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10838 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10839 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10840 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10841 }
10842
10843 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10844 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10845 if (hasOperation(ABDOpcode, VT)) {
10846 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10847 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10848 }
10849
10850 return SDValue();
10851}
10852
10853SDValue DAGCombiner::visitABS(SDNode *N) {
10854 SDValue N0 = N->getOperand(0);
10855 EVT VT = N->getValueType(0);
10856 SDLoc DL(N);
10857
10858 // fold (abs c1) -> c2
10859 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10860 return C;
10861 // fold (abs (abs x)) -> (abs x)
10862 if (N0.getOpcode() == ISD::ABS)
10863 return N0;
10864 // fold (abs x) -> x iff not-negative
10865 if (DAG.SignBitIsZero(N0))
10866 return N0;
10867
10868 if (SDValue ABD = foldABSToABD(N, DL))
10869 return ABD;
10870
10871 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10872 // iff zero_extend/truncate are free.
10873 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10874 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10875 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10876 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10877 hasOperation(ISD::ABS, ExtVT)) {
10878 return DAG.getNode(
10879 ISD::ZERO_EXTEND, DL, VT,
10880 DAG.getNode(ISD::ABS, DL, ExtVT,
10881 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10882 }
10883 }
10884
10885 return SDValue();
10886}
10887
10888SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10889 SDValue N0 = N->getOperand(0);
10890 EVT VT = N->getValueType(0);
10891 SDLoc DL(N);
10892
10893 // fold (bswap c1) -> c2
10894 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
10895 return C;
10896 // fold (bswap (bswap x)) -> x
10897 if (N0.getOpcode() == ISD::BSWAP)
10898 return N0.getOperand(0);
10899
10900 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10901 // isn't supported, it will be expanded to bswap followed by a manual reversal
10902 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10903 // the two bswaps if the bitreverse gets expanded.
10904 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10905 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10906 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10907 }
10908
10909 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10910 // iff x >= bw/2 (i.e. lower half is known zero)
10911 unsigned BW = VT.getScalarSizeInBits();
10912 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10913 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10914 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10915 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10916 ShAmt->getZExtValue() >= (BW / 2) &&
10917 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10918 TLI.isTruncateFree(VT, HalfVT) &&
10919 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10920 SDValue Res = N0.getOperand(0);
10921 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10922 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10923 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10924 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10925 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10926 return DAG.getZExtOrTrunc(Res, DL, VT);
10927 }
10928 }
10929
10930 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10931 // inverse-shift-of-bswap:
10932 // bswap (X u<< C) --> (bswap X) u>> C
10933 // bswap (X u>> C) --> (bswap X) u<< C
10934 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10935 N0.hasOneUse()) {
10936 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10937 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10938 ShAmt->getZExtValue() % 8 == 0) {
10939 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10940 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10941 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10942 }
10943 }
10944
10945 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
10946 return V;
10947
10948 return SDValue();
10949}
10950
10951SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10952 SDValue N0 = N->getOperand(0);
10953 EVT VT = N->getValueType(0);
10954 SDLoc DL(N);
10955
10956 // fold (bitreverse c1) -> c2
10957 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
10958 return C;
10959 // fold (bitreverse (bitreverse x)) -> x
10960 if (N0.getOpcode() == ISD::BITREVERSE)
10961 return N0.getOperand(0);
10962 return SDValue();
10963}
10964
10965SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10966 SDValue N0 = N->getOperand(0);
10967 EVT VT = N->getValueType(0);
10968 SDLoc DL(N);
10969
10970 // fold (ctlz c1) -> c2
10971 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
10972 return C;
10973
10974 // If the value is known never to be zero, switch to the undef version.
10975 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
10976 if (DAG.isKnownNeverZero(N0))
10977 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
10978
10979 return SDValue();
10980}
10981
10982SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
10983 SDValue N0 = N->getOperand(0);
10984 EVT VT = N->getValueType(0);
10985 SDLoc DL(N);
10986
10987 // fold (ctlz_zero_undef c1) -> c2
10988 if (SDValue C =
10990 return C;
10991 return SDValue();
10992}
10993
10994SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10995 SDValue N0 = N->getOperand(0);
10996 EVT VT = N->getValueType(0);
10997 SDLoc DL(N);
10998
10999 // fold (cttz c1) -> c2
11000 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11001 return C;
11002
11003 // If the value is known never to be zero, switch to the undef version.
11004 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11005 if (DAG.isKnownNeverZero(N0))
11006 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11007
11008 return SDValue();
11009}
11010
11011SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11012 SDValue N0 = N->getOperand(0);
11013 EVT VT = N->getValueType(0);
11014 SDLoc DL(N);
11015
11016 // fold (cttz_zero_undef c1) -> c2
11017 if (SDValue C =
11019 return C;
11020 return SDValue();
11021}
11022
11023SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11024 SDValue N0 = N->getOperand(0);
11025 EVT VT = N->getValueType(0);
11026 unsigned NumBits = VT.getScalarSizeInBits();
11027 SDLoc DL(N);
11028
11029 // fold (ctpop c1) -> c2
11030 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11031 return C;
11032
11033 // If the source is being shifted, but doesn't affect any active bits,
11034 // then we can call CTPOP on the shift source directly.
11035 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11036 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11037 const APInt &Amt = AmtC->getAPIntValue();
11038 if (Amt.ult(NumBits)) {
11039 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11040 if ((N0.getOpcode() == ISD::SRL &&
11041 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11042 (N0.getOpcode() == ISD::SHL &&
11043 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11044 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11045 }
11046 }
11047 }
11048 }
11049
11050 // If the upper bits are known to be zero, then see if its profitable to
11051 // only count the lower bits.
11052 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11053 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11054 if (hasOperation(ISD::CTPOP, HalfVT) &&
11055 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11056 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11057 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11058 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11059 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11060 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11061 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11062 }
11063 }
11064 }
11065
11066 return SDValue();
11067}
11068
11069// FIXME: This should be checking for no signed zeros on individual operands, as
11070// well as no nans.
11072 SDValue RHS,
11073 const TargetLowering &TLI) {
11074 const TargetOptions &Options = DAG.getTarget().Options;
11075 EVT VT = LHS.getValueType();
11076
11077 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11080}
11081
11083 SDValue RHS, SDValue True, SDValue False,
11085 const TargetLowering &TLI,
11086 SelectionDAG &DAG) {
11087 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11088 switch (CC) {
11089 case ISD::SETOLT:
11090 case ISD::SETOLE:
11091 case ISD::SETLT:
11092 case ISD::SETLE:
11093 case ISD::SETULT:
11094 case ISD::SETULE: {
11095 // Since it's known never nan to get here already, either fminnum or
11096 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11097 // expanded in terms of it.
11098 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11099 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11100 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11101
11102 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11103 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11104 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11105 return SDValue();
11106 }
11107 case ISD::SETOGT:
11108 case ISD::SETOGE:
11109 case ISD::SETGT:
11110 case ISD::SETGE:
11111 case ISD::SETUGT:
11112 case ISD::SETUGE: {
11113 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11114 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11115 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11116
11117 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11118 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11119 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11120 return SDValue();
11121 }
11122 default:
11123 return SDValue();
11124 }
11125}
11126
11127/// Generate Min/Max node
11128SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11129 SDValue RHS, SDValue True,
11130 SDValue False, ISD::CondCode CC) {
11131 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11132 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11133
11134 // If we can't directly match this, try to see if we can pull an fneg out of
11135 // the select.
11137 True, DAG, LegalOperations, ForCodeSize);
11138 if (!NegTrue)
11139 return SDValue();
11140
11141 HandleSDNode NegTrueHandle(NegTrue);
11142
11143 // Try to unfold an fneg from the select if we are comparing the negated
11144 // constant.
11145 //
11146 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11147 //
11148 // TODO: Handle fabs
11149 if (LHS == NegTrue) {
11150 // If we can't directly match this, try to see if we can pull an fneg out of
11151 // the select.
11153 RHS, DAG, LegalOperations, ForCodeSize);
11154 if (NegRHS) {
11155 HandleSDNode NegRHSHandle(NegRHS);
11156 if (NegRHS == False) {
11157 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11158 False, CC, TLI, DAG);
11159 if (Combined)
11160 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11161 }
11162 }
11163 }
11164
11165 return SDValue();
11166}
11167
11168/// If a (v)select has a condition value that is a sign-bit test, try to smear
11169/// the condition operand sign-bit across the value width and use it as a mask.
11171 SelectionDAG &DAG) {
11172 SDValue Cond = N->getOperand(0);
11173 SDValue C1 = N->getOperand(1);
11174 SDValue C2 = N->getOperand(2);
11176 return SDValue();
11177
11178 EVT VT = N->getValueType(0);
11179 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11180 VT != Cond.getOperand(0).getValueType())
11181 return SDValue();
11182
11183 // The inverted-condition + commuted-select variants of these patterns are
11184 // canonicalized to these forms in IR.
11185 SDValue X = Cond.getOperand(0);
11186 SDValue CondC = Cond.getOperand(1);
11187 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11188 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11190 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11191 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11192 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11193 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11194 }
11195 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11196 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11197 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11198 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11199 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11200 }
11201 return SDValue();
11202}
11203
11205 const TargetLowering &TLI) {
11206 if (!TLI.convertSelectOfConstantsToMath(VT))
11207 return false;
11208
11209 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11210 return true;
11212 return true;
11213
11214 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11215 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11216 return true;
11217 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11218 return true;
11219
11220 return false;
11221}
11222
11223SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11224 SDValue Cond = N->getOperand(0);
11225 SDValue N1 = N->getOperand(1);
11226 SDValue N2 = N->getOperand(2);
11227 EVT VT = N->getValueType(0);
11228 EVT CondVT = Cond.getValueType();
11229 SDLoc DL(N);
11230
11231 if (!VT.isInteger())
11232 return SDValue();
11233
11234 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11235 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11236 if (!C1 || !C2)
11237 return SDValue();
11238
11239 if (CondVT != MVT::i1 || LegalOperations) {
11240 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11241 // We can't do this reliably if integer based booleans have different contents
11242 // to floating point based booleans. This is because we can't tell whether we
11243 // have an integer-based boolean or a floating-point-based boolean unless we
11244 // can find the SETCC that produced it and inspect its operands. This is
11245 // fairly easy if C is the SETCC node, but it can potentially be
11246 // undiscoverable (or not reasonably discoverable). For example, it could be
11247 // in another basic block or it could require searching a complicated
11248 // expression.
11249 if (CondVT.isInteger() &&
11250 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11252 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11254 C1->isZero() && C2->isOne()) {
11255 SDValue NotCond =
11256 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11257 if (VT.bitsEq(CondVT))
11258 return NotCond;
11259 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11260 }
11261
11262 return SDValue();
11263 }
11264
11265 // Only do this before legalization to avoid conflicting with target-specific
11266 // transforms in the other direction (create a select from a zext/sext). There
11267 // is also a target-independent combine here in DAGCombiner in the other
11268 // direction for (select Cond, -1, 0) when the condition is not i1.
11269 assert(CondVT == MVT::i1 && !LegalOperations);
11270
11271 // select Cond, 1, 0 --> zext (Cond)
11272 if (C1->isOne() && C2->isZero())
11273 return DAG.getZExtOrTrunc(Cond, DL, VT);
11274
11275 // select Cond, -1, 0 --> sext (Cond)
11276 if (C1->isAllOnes() && C2->isZero())
11277 return DAG.getSExtOrTrunc(Cond, DL, VT);
11278
11279 // select Cond, 0, 1 --> zext (!Cond)
11280 if (C1->isZero() && C2->isOne()) {
11281 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11282 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11283 return NotCond;
11284 }
11285
11286 // select Cond, 0, -1 --> sext (!Cond)
11287 if (C1->isZero() && C2->isAllOnes()) {
11288 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11289 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11290 return NotCond;
11291 }
11292
11293 // Use a target hook because some targets may prefer to transform in the
11294 // other direction.
11296 return SDValue();
11297
11298 // For any constants that differ by 1, we can transform the select into
11299 // an extend and add.
11300 const APInt &C1Val = C1->getAPIntValue();
11301 const APInt &C2Val = C2->getAPIntValue();
11302
11303 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11304 if (C1Val - 1 == C2Val) {
11305 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11306 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11307 }
11308
11309 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11310 if (C1Val + 1 == C2Val) {
11311 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11312 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11313 }
11314
11315 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11316 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11317 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11318 SDValue ShAmtC =
11319 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11320 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11321 }
11322
11323 // select Cond, -1, C --> or (sext Cond), C
11324 if (C1->isAllOnes()) {
11325 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11326 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11327 }
11328
11329 // select Cond, C, -1 --> or (sext (not Cond)), C
11330 if (C2->isAllOnes()) {
11331 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11332 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11333 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11334 }
11335
11337 return V;
11338
11339 return SDValue();
11340}
11341
11342template <class MatchContextClass>
11344 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11345 N->getOpcode() == ISD::VP_SELECT) &&
11346 "Expected a (v)(vp.)select");
11347 SDValue Cond = N->getOperand(0);
11348 SDValue T = N->getOperand(1), F = N->getOperand(2);
11349 EVT VT = N->getValueType(0);
11350 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11351 MatchContextClass matcher(DAG, TLI, N);
11352
11353 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11354 return SDValue();
11355
11356 // select Cond, Cond, F --> or Cond, F
11357 // select Cond, 1, F --> or Cond, F
11358 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11359 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11360
11361 // select Cond, T, Cond --> and Cond, T
11362 // select Cond, T, 0 --> and Cond, T
11363 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11364 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11365
11366 // select Cond, T, 1 --> or (not Cond), T
11367 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11368 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11369 DAG.getAllOnesConstant(SDLoc(N), VT));
11370 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11371 }
11372
11373 // select Cond, 0, F --> and (not Cond), F
11374 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11375 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11376 DAG.getAllOnesConstant(SDLoc(N), VT));
11377 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11378 }
11379
11380 return SDValue();
11381}
11382
11384 SDValue N0 = N->getOperand(0);
11385 SDValue N1 = N->getOperand(1);
11386 SDValue N2 = N->getOperand(2);
11387 EVT VT = N->getValueType(0);
11388 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11389 return SDValue();
11390
11391 SDValue Cond0 = N0.getOperand(0);
11392 SDValue Cond1 = N0.getOperand(1);
11393 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11394 if (VT != Cond0.getValueType())
11395 return SDValue();
11396
11397 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11398 // compare is inverted from that pattern ("Cond0 s> -1").
11399 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11400 ; // This is the pattern we are looking for.
11401 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11402 std::swap(N1, N2);
11403 else
11404 return SDValue();
11405
11406 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11407 if (isNullOrNullSplat(N2)) {
11408 SDLoc DL(N);
11409 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11410 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11411 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11412 }
11413
11414 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11415 if (isAllOnesOrAllOnesSplat(N1)) {
11416 SDLoc DL(N);
11417 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11418 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11419 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11420 }
11421
11422 // If we have to invert the sign bit mask, only do that transform if the
11423 // target has a bitwise 'and not' instruction (the invert is free).
11424 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11425 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11426 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11427 SDLoc DL(N);
11428 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11429 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11430 SDValue Not = DAG.getNOT(DL, Sra, VT);
11431 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11432 }
11433
11434 // TODO: There's another pattern in this family, but it may require
11435 // implementing hasOrNot() to check for profitability:
11436 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11437
11438 return SDValue();
11439}
11440
11441SDValue DAGCombiner::visitSELECT(SDNode *N) {
11442 SDValue N0 = N->getOperand(0);
11443 SDValue N1 = N->getOperand(1);
11444 SDValue N2 = N->getOperand(2);
11445 EVT VT = N->getValueType(0);
11446 EVT VT0 = N0.getValueType();
11447 SDLoc DL(N);
11448 SDNodeFlags Flags = N->getFlags();
11449
11450 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11451 return V;
11452
11453 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11454 return V;
11455
11456 // select (not Cond), N1, N2 -> select Cond, N2, N1
11457 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11458 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11459 SelectOp->setFlags(Flags);
11460 return SelectOp;
11461 }
11462
11463 if (SDValue V = foldSelectOfConstants(N))
11464 return V;
11465
11466 // If we can fold this based on the true/false value, do so.
11467 if (SimplifySelectOps(N, N1, N2))
11468 return SDValue(N, 0); // Don't revisit N.
11469
11470 if (VT0 == MVT::i1) {
11471 // The code in this block deals with the following 2 equivalences:
11472 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11473 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11474 // The target can specify its preferred form with the
11475 // shouldNormalizeToSelectSequence() callback. However we always transform
11476 // to the right anyway if we find the inner select exists in the DAG anyway
11477 // and we always transform to the left side if we know that we can further
11478 // optimize the combination of the conditions.
11479 bool normalizeToSequence =
11481 // select (and Cond0, Cond1), X, Y
11482 // -> select Cond0, (select Cond1, X, Y), Y
11483 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11484 SDValue Cond0 = N0->getOperand(0);
11485 SDValue Cond1 = N0->getOperand(1);
11486 SDValue InnerSelect =
11487 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11488 if (normalizeToSequence || !InnerSelect.use_empty())
11489 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11490 InnerSelect, N2, Flags);
11491 // Cleanup on failure.
11492 if (InnerSelect.use_empty())
11493 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11494 }
11495 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11496 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11497 SDValue Cond0 = N0->getOperand(0);
11498 SDValue Cond1 = N0->getOperand(1);
11499 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11500 Cond1, N1, N2, Flags);
11501 if (normalizeToSequence || !InnerSelect.use_empty())
11502 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11503 InnerSelect, Flags);
11504 // Cleanup on failure.
11505 if (InnerSelect.use_empty())
11506 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11507 }
11508
11509 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11510 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11511 SDValue N1_0 = N1->getOperand(0);
11512 SDValue N1_1 = N1->getOperand(1);
11513 SDValue N1_2 = N1->getOperand(2);
11514 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11515 // Create the actual and node if we can generate good code for it.
11516 if (!normalizeToSequence) {
11517 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11518 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11519 N2, Flags);
11520 }
11521 // Otherwise see if we can optimize the "and" to a better pattern.
11522 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11523 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11524 N2, Flags);
11525 }
11526 }
11527 }
11528 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11529 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11530 SDValue N2_0 = N2->getOperand(0);
11531 SDValue N2_1 = N2->getOperand(1);
11532 SDValue N2_2 = N2->getOperand(2);
11533 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11534 // Create the actual or node if we can generate good code for it.
11535 if (!normalizeToSequence) {
11536 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11537 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11538 N2_2, Flags);
11539 }
11540 // Otherwise see if we can optimize to a better pattern.
11541 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11542 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11543 N2_2, Flags);
11544 }
11545 }
11546 }
11547
11548 // Fold selects based on a setcc into other things, such as min/max/abs.
11549 if (N0.getOpcode() == ISD::SETCC) {
11550 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11551 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11552
11553 // select (fcmp lt x, y), x, y -> fminnum x, y
11554 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11555 //
11556 // This is OK if we don't care what happens if either operand is a NaN.
11557 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11558 if (SDValue FMinMax =
11559 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11560 return FMinMax;
11561
11562 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11563 // This is conservatively limited to pre-legal-operations to give targets
11564 // a chance to reverse the transform if they want to do that. Also, it is
11565 // unlikely that the pattern would be formed late, so it's probably not
11566 // worth going through the other checks.
11567 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11568 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11569 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11570 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11571 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11572 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11573 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11574 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11575 //
11576 // The IR equivalent of this transform would have this form:
11577 // %a = add %x, C
11578 // %c = icmp ugt %x, ~C
11579 // %r = select %c, -1, %a
11580 // =>
11581 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11582 // %u0 = extractvalue %u, 0
11583 // %u1 = extractvalue %u, 1
11584 // %r = select %u1, -1, %u0
11585 SDVTList VTs = DAG.getVTList(VT, VT0);
11586 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11587 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11588 }
11589 }
11590
11591 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11592 (!LegalOperations &&
11594 // Any flags available in a select/setcc fold will be on the setcc as they
11595 // migrated from fcmp
11596 Flags = N0->getFlags();
11597 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11598 N2, N0.getOperand(2));
11599 SelectNode->setFlags(Flags);
11600 return SelectNode;
11601 }
11602
11603 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11604 return NewSel;
11605 }
11606
11607 if (!VT.isVector())
11608 if (SDValue BinOp = foldSelectOfBinops(N))
11609 return BinOp;
11610
11611 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11612 return R;
11613
11614 return SDValue();
11615}
11616
11617// This function assumes all the vselect's arguments are CONCAT_VECTOR
11618// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11620 SDLoc DL(N);
11621 SDValue Cond = N->getOperand(0);
11622 SDValue LHS = N->getOperand(1);
11623 SDValue RHS = N->getOperand(2);
11624 EVT VT = N->getValueType(0);
11625 int NumElems = VT.getVectorNumElements();
11626 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11627 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11628 Cond.getOpcode() == ISD::BUILD_VECTOR);
11629
11630 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11631 // binary ones here.
11632 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11633 return SDValue();
11634
11635 // We're sure we have an even number of elements due to the
11636 // concat_vectors we have as arguments to vselect.
11637 // Skip BV elements until we find one that's not an UNDEF
11638 // After we find an UNDEF element, keep looping until we get to half the
11639 // length of the BV and see if all the non-undef nodes are the same.
11640 ConstantSDNode *BottomHalf = nullptr;
11641 for (int i = 0; i < NumElems / 2; ++i) {
11642 if (Cond->getOperand(i)->isUndef())
11643 continue;
11644
11645 if (BottomHalf == nullptr)
11646 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11647 else if (Cond->getOperand(i).getNode() != BottomHalf)
11648 return SDValue();
11649 }
11650
11651 // Do the same for the second half of the BuildVector
11652 ConstantSDNode *TopHalf = nullptr;
11653 for (int i = NumElems / 2; i < NumElems; ++i) {
11654 if (Cond->getOperand(i)->isUndef())
11655 continue;
11656
11657 if (TopHalf == nullptr)
11658 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11659 else if (Cond->getOperand(i).getNode() != TopHalf)
11660 return SDValue();
11661 }
11662
11663 assert(TopHalf && BottomHalf &&
11664 "One half of the selector was all UNDEFs and the other was all the "
11665 "same value. This should have been addressed before this function.");
11666 return DAG.getNode(
11668 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11669 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11670}
11671
11672bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11673 SelectionDAG &DAG, const SDLoc &DL) {
11674
11675 // Only perform the transformation when existing operands can be reused.
11676 if (IndexIsScaled)
11677 return false;
11678
11679 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11680 return false;
11681
11682 EVT VT = BasePtr.getValueType();
11683
11684 if (SDValue SplatVal = DAG.getSplatValue(Index);
11685 SplatVal && !isNullConstant(SplatVal) &&
11686 SplatVal.getValueType() == VT) {
11687 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11688 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11689 return true;
11690 }
11691
11692 if (Index.getOpcode() != ISD::ADD)
11693 return false;
11694
11695 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11696 SplatVal && SplatVal.getValueType() == VT) {
11697 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11698 Index = Index.getOperand(1);
11699 return true;
11700 }
11701 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11702 SplatVal && SplatVal.getValueType() == VT) {
11703 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11704 Index = Index.getOperand(0);
11705 return true;
11706 }
11707 return false;
11708}
11709
11710// Fold sext/zext of index into index type.
11712 SelectionDAG &DAG) {
11713 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11714
11715 // It's always safe to look through zero extends.
11716 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11717 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11718 IndexType = ISD::UNSIGNED_SCALED;
11719 Index = Index.getOperand(0);
11720 return true;
11721 }
11722 if (ISD::isIndexTypeSigned(IndexType)) {
11723 IndexType = ISD::UNSIGNED_SCALED;
11724 return true;
11725 }
11726 }
11727
11728 // It's only safe to look through sign extends when Index is signed.
11729 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11730 ISD::isIndexTypeSigned(IndexType) &&
11731 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11732 Index = Index.getOperand(0);
11733 return true;
11734 }
11735
11736 return false;
11737}
11738
11739SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11740 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11741 SDValue Mask = MSC->getMask();
11742 SDValue Chain = MSC->getChain();
11743 SDValue Index = MSC->getIndex();
11744 SDValue Scale = MSC->getScale();
11745 SDValue StoreVal = MSC->getValue();
11746 SDValue BasePtr = MSC->getBasePtr();
11747 SDValue VL = MSC->getVectorLength();
11748 ISD::MemIndexType IndexType = MSC->getIndexType();
11749 SDLoc DL(N);
11750
11751 // Zap scatters with a zero mask.
11753 return Chain;
11754
11755 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11756 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11757 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11758 DL, Ops, MSC->getMemOperand(), IndexType);
11759 }
11760
11761 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11762 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11763 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11764 DL, Ops, MSC->getMemOperand(), IndexType);
11765 }
11766
11767 return SDValue();
11768}
11769
11770SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11771 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11772 SDValue Mask = MSC->getMask();
11773 SDValue Chain = MSC->getChain();
11774 SDValue Index = MSC->getIndex();
11775 SDValue Scale = MSC->getScale();
11776 SDValue StoreVal = MSC->getValue();
11777 SDValue BasePtr = MSC->getBasePtr();
11778 ISD::MemIndexType IndexType = MSC->getIndexType();
11779 SDLoc DL(N);
11780
11781 // Zap scatters with a zero mask.
11783 return Chain;
11784
11785 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11786 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11787 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11788 DL, Ops, MSC->getMemOperand(), IndexType,
11789 MSC->isTruncatingStore());
11790 }
11791
11792 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11793 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11794 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11795 DL, Ops, MSC->getMemOperand(), IndexType,
11796 MSC->isTruncatingStore());
11797 }
11798
11799 return SDValue();
11800}
11801
11802SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11803 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11804 SDValue Mask = MST->getMask();
11805 SDValue Chain = MST->getChain();
11806 SDValue Value = MST->getValue();
11807 SDValue Ptr = MST->getBasePtr();
11808 SDLoc DL(N);
11809
11810 // Zap masked stores with a zero mask.
11812 return Chain;
11813
11814 // Remove a masked store if base pointers and masks are equal.
11815 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11816 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11817 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11818 !MST->getBasePtr().isUndef() &&
11819 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11820 MST1->getMemoryVT().getStoreSize()) ||
11822 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11823 MST->getMemoryVT().getStoreSize())) {
11824 CombineTo(MST1, MST1->getChain());
11825 if (N->getOpcode() != ISD::DELETED_NODE)
11826 AddToWorklist(N);
11827 return SDValue(N, 0);
11828 }
11829 }
11830
11831 // If this is a masked load with an all ones mask, we can use a unmasked load.
11832 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11833 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11834 !MST->isCompressingStore() && !MST->isTruncatingStore())
11835 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11836 MST->getBasePtr(), MST->getPointerInfo(),
11837 MST->getOriginalAlign(),
11838 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11839
11840 // Try transforming N to an indexed store.
11841 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11842 return SDValue(N, 0);
11843
11844 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11845 Value.getValueType().isInteger() &&
11846 (!isa<ConstantSDNode>(Value) ||
11847 !cast<ConstantSDNode>(Value)->isOpaque())) {
11848 APInt TruncDemandedBits =
11849 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11851
11852 // See if we can simplify the operation with
11853 // SimplifyDemandedBits, which only works if the value has a single use.
11854 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11855 // Re-visit the store if anything changed and the store hasn't been merged
11856 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11857 // node back to the worklist if necessary, but we also need to re-visit
11858 // the Store node itself.
11859 if (N->getOpcode() != ISD::DELETED_NODE)
11860 AddToWorklist(N);
11861 return SDValue(N, 0);
11862 }
11863 }
11864
11865 // If this is a TRUNC followed by a masked store, fold this into a masked
11866 // truncating store. We can do this even if this is already a masked
11867 // truncstore.
11868 // TODO: Try combine to masked compress store if possiable.
11869 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11870 MST->isUnindexed() && !MST->isCompressingStore() &&
11871 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11872 MST->getMemoryVT(), LegalOperations)) {
11873 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11874 Value.getOperand(0).getValueType());
11875 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11876 MST->getOffset(), Mask, MST->getMemoryVT(),
11877 MST->getMemOperand(), MST->getAddressingMode(),
11878 /*IsTruncating=*/true);
11879 }
11880
11881 return SDValue();
11882}
11883
11884SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11885 auto *SST = cast<VPStridedStoreSDNode>(N);
11886 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11887 // Combine strided stores with unit-stride to a regular VP store.
11888 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11889 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11890 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11891 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11892 SST->getVectorLength(), SST->getMemoryVT(),
11893 SST->getMemOperand(), SST->getAddressingMode(),
11894 SST->isTruncatingStore(), SST->isCompressingStore());
11895 }
11896 return SDValue();
11897}
11898
11899SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11900 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11901 SDValue Mask = MGT->getMask();
11902 SDValue Chain = MGT->getChain();
11903 SDValue Index = MGT->getIndex();
11904 SDValue Scale = MGT->getScale();
11905 SDValue BasePtr = MGT->getBasePtr();
11906 SDValue VL = MGT->getVectorLength();
11907 ISD::MemIndexType IndexType = MGT->getIndexType();
11908 SDLoc DL(N);
11909
11910 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11911 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11912 return DAG.getGatherVP(
11913 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11914 Ops, MGT->getMemOperand(), IndexType);
11915 }
11916
11917 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11918 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11919 return DAG.getGatherVP(
11920 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11921 Ops, MGT->getMemOperand(), IndexType);
11922 }
11923
11924 return SDValue();
11925}
11926
11927SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11928 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11929 SDValue Mask = MGT->getMask();
11930 SDValue Chain = MGT->getChain();
11931 SDValue Index = MGT->getIndex();
11932 SDValue Scale = MGT->getScale();
11933 SDValue PassThru = MGT->getPassThru();
11934 SDValue BasePtr = MGT->getBasePtr();
11935 ISD::MemIndexType IndexType = MGT->getIndexType();
11936 SDLoc DL(N);
11937
11938 // Zap gathers with a zero mask.
11940 return CombineTo(N, PassThru, MGT->getChain());
11941
11942 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11943 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11944 return DAG.getMaskedGather(
11945 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11946 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11947 }
11948
11949 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11950 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11951 return DAG.getMaskedGather(
11952 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11953 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11954 }
11955
11956 return SDValue();
11957}
11958
11959SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11960 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11961 SDValue Mask = MLD->getMask();
11962 SDLoc DL(N);
11963
11964 // Zap masked loads with a zero mask.
11966 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11967
11968 // If this is a masked load with an all ones mask, we can use a unmasked load.
11969 // FIXME: Can we do this for indexed, expanding, or extending loads?
11970 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11971 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11972 SDValue NewLd = DAG.getLoad(
11973 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11974 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11975 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
11976 return CombineTo(N, NewLd, NewLd.getValue(1));
11977 }
11978
11979 // Try transforming N to an indexed load.
11980 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11981 return SDValue(N, 0);
11982
11983 return SDValue();
11984}
11985
11986SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
11987 auto *SLD = cast<VPStridedLoadSDNode>(N);
11988 EVT EltVT = SLD->getValueType(0).getVectorElementType();
11989 // Combine strided loads with unit-stride to a regular VP load.
11990 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
11991 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11992 SDValue NewLd = DAG.getLoadVP(
11993 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
11994 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
11995 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
11996 SLD->getMemOperand(), SLD->isExpandingLoad());
11997 return CombineTo(N, NewLd, NewLd.getValue(1));
11998 }
11999 return SDValue();
12000}
12001
12002/// A vector select of 2 constant vectors can be simplified to math/logic to
12003/// avoid a variable select instruction and possibly avoid constant loads.
12004SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12005 SDValue Cond = N->getOperand(0);
12006 SDValue N1 = N->getOperand(1);
12007 SDValue N2 = N->getOperand(2);
12008 EVT VT = N->getValueType(0);
12009 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12013 return SDValue();
12014
12015 // Check if we can use the condition value to increment/decrement a single
12016 // constant value. This simplifies a select to an add and removes a constant
12017 // load/materialization from the general case.
12018 bool AllAddOne = true;
12019 bool AllSubOne = true;
12020 unsigned Elts = VT.getVectorNumElements();
12021 for (unsigned i = 0; i != Elts; ++i) {
12022 SDValue N1Elt = N1.getOperand(i);
12023 SDValue N2Elt = N2.getOperand(i);
12024 if (N1Elt.isUndef() || N2Elt.isUndef())
12025 continue;
12026 if (N1Elt.getValueType() != N2Elt.getValueType())
12027 continue;
12028
12029 const APInt &C1 = N1Elt->getAsAPIntVal();
12030 const APInt &C2 = N2Elt->getAsAPIntVal();
12031 if (C1 != C2 + 1)
12032 AllAddOne = false;
12033 if (C1 != C2 - 1)
12034 AllSubOne = false;
12035 }
12036
12037 // Further simplifications for the extra-special cases where the constants are
12038 // all 0 or all -1 should be implemented as folds of these patterns.
12039 SDLoc DL(N);
12040 if (AllAddOne || AllSubOne) {
12041 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12042 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12043 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12044 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12045 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12046 }
12047
12048 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12049 APInt Pow2C;
12050 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12051 isNullOrNullSplat(N2)) {
12052 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12053 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12054 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12055 }
12056
12058 return V;
12059
12060 // The general case for select-of-constants:
12061 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12062 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12063 // leave that to a machine-specific pass.
12064 return SDValue();
12065}
12066
12067SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12068 SDValue N0 = N->getOperand(0);
12069 SDValue N1 = N->getOperand(1);
12070 SDValue N2 = N->getOperand(2);
12071
12072 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12073 return V;
12074
12075 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12076 return V;
12077
12078 return SDValue();
12079}
12080
12081SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12082 SDValue N0 = N->getOperand(0);
12083 SDValue N1 = N->getOperand(1);
12084 SDValue N2 = N->getOperand(2);
12085 EVT VT = N->getValueType(0);
12086 SDLoc DL(N);
12087
12088 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12089 return V;
12090
12091 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12092 return V;
12093
12094 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12095 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12096 return DAG.getSelect(DL, VT, F, N2, N1);
12097
12098 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12099 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12102 TLI.getBooleanContents(N0.getValueType()) ==
12104 return DAG.getNode(
12105 ISD::ADD, DL, N1.getValueType(), N2,
12106 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12107 }
12108
12109 // Canonicalize integer abs.
12110 // vselect (setg[te] X, 0), X, -X ->
12111 // vselect (setgt X, -1), X, -X ->
12112 // vselect (setl[te] X, 0), -X, X ->
12113 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12114 if (N0.getOpcode() == ISD::SETCC) {
12115 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12116 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12117 bool isAbs = false;
12118 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12119
12120 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12121 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12122 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12124 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12125 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12127
12128 if (isAbs) {
12130 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12131
12132 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12133 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12134 DL, getShiftAmountTy(VT)));
12135 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12136 AddToWorklist(Shift.getNode());
12137 AddToWorklist(Add.getNode());
12138 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12139 }
12140
12141 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12142 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12143 //
12144 // This is OK if we don't care about what happens if either operand is a
12145 // NaN.
12146 //
12147 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12148 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12149 return FMinMax;
12150 }
12151
12152 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12153 return S;
12154 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12155 return S;
12156
12157 // If this select has a condition (setcc) with narrower operands than the
12158 // select, try to widen the compare to match the select width.
12159 // TODO: This should be extended to handle any constant.
12160 // TODO: This could be extended to handle non-loading patterns, but that
12161 // requires thorough testing to avoid regressions.
12162 if (isNullOrNullSplat(RHS)) {
12163 EVT NarrowVT = LHS.getValueType();
12165 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12166 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12167 unsigned WideWidth = WideVT.getScalarSizeInBits();
12168 bool IsSigned = isSignedIntSetCC(CC);
12169 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12170 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12171 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12172 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12173 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12174 // Both compare operands can be widened for free. The LHS can use an
12175 // extended load, and the RHS is a constant:
12176 // vselect (ext (setcc load(X), C)), N1, N2 -->
12177 // vselect (setcc extload(X), C'), N1, N2
12178 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12179 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12180 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12181 EVT WideSetCCVT = getSetCCResultType(WideVT);
12182 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12183 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12184 }
12185 }
12186
12187 // Match VSELECTs with absolute difference patterns.
12188 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12189 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12190 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12191 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12192 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12193 N1.getOperand(0) == N2.getOperand(1) &&
12194 N1.getOperand(1) == N2.getOperand(0)) {
12195 bool IsSigned = isSignedIntSetCC(CC);
12196 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12197 if (hasOperation(ABDOpc, VT)) {
12198 switch (CC) {
12199 case ISD::SETGT:
12200 case ISD::SETGE:
12201 case ISD::SETUGT:
12202 case ISD::SETUGE:
12203 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12204 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12205 break;
12206 case ISD::SETLT:
12207 case ISD::SETLE:
12208 case ISD::SETULT:
12209 case ISD::SETULE:
12210 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12211 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12212 break;
12213 default:
12214 break;
12215 }
12216 }
12217 }
12218
12219 // Match VSELECTs into add with unsigned saturation.
12220 if (hasOperation(ISD::UADDSAT, VT)) {
12221 // Check if one of the arms of the VSELECT is vector with all bits set.
12222 // If it's on the left side invert the predicate to simplify logic below.
12223 SDValue Other;
12224 ISD::CondCode SatCC = CC;
12226 Other = N2;
12227 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12228 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12229 Other = N1;
12230 }
12231
12232 if (Other && Other.getOpcode() == ISD::ADD) {
12233 SDValue CondLHS = LHS, CondRHS = RHS;
12234 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12235
12236 // Canonicalize condition operands.
12237 if (SatCC == ISD::SETUGE) {
12238 std::swap(CondLHS, CondRHS);
12239 SatCC = ISD::SETULE;
12240 }
12241
12242 // We can test against either of the addition operands.
12243 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12244 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12245 if (SatCC == ISD::SETULE && Other == CondRHS &&
12246 (OpLHS == CondLHS || OpRHS == CondLHS))
12247 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12248
12249 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12250 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12251 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12252 CondLHS == OpLHS) {
12253 // If the RHS is a constant we have to reverse the const
12254 // canonicalization.
12255 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12256 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12257 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12258 };
12259 if (SatCC == ISD::SETULE &&
12260 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12261 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12262 }
12263 }
12264 }
12265
12266 // Match VSELECTs into sub with unsigned saturation.
12267 if (hasOperation(ISD::USUBSAT, VT)) {
12268 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12269 // the left side invert the predicate to simplify logic below.
12270 SDValue Other;
12271 ISD::CondCode SatCC = CC;
12273 Other = N2;
12274 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12276 Other = N1;
12277 }
12278
12279 // zext(x) >= y ? trunc(zext(x) - y) : 0
12280 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12281 // zext(x) > y ? trunc(zext(x) - y) : 0
12282 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12283 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12284 Other.getOperand(0).getOpcode() == ISD::SUB &&
12285 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12286 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12287 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12288 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12289 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12290 DAG, DL))
12291 return R;
12292 }
12293
12294 if (Other && Other.getNumOperands() == 2) {
12295 SDValue CondRHS = RHS;
12296 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12297
12298 if (OpLHS == LHS) {
12299 // Look for a general sub with unsigned saturation first.
12300 // x >= y ? x-y : 0 --> usubsat x, y
12301 // x > y ? x-y : 0 --> usubsat x, y
12302 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12303 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12304 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12305
12306 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12307 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12308 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12309 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12310 // If the RHS is a constant we have to reverse the const
12311 // canonicalization.
12312 // x > C-1 ? x+-C : 0 --> usubsat x, C
12313 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12314 return (!Op && !Cond) ||
12315 (Op && Cond &&
12316 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12317 };
12318 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12319 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12320 /*AllowUndefs*/ true)) {
12321 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12322 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12323 }
12324
12325 // Another special case: If C was a sign bit, the sub has been
12326 // canonicalized into a xor.
12327 // FIXME: Would it be better to use computeKnownBits to
12328 // determine whether it's safe to decanonicalize the xor?
12329 // x s< 0 ? x^C : 0 --> usubsat x, C
12330 APInt SplatValue;
12331 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12332 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12334 SplatValue.isSignMask()) {
12335 // Note that we have to rebuild the RHS constant here to
12336 // ensure we don't rely on particular values of undef lanes.
12337 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12338 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12339 }
12340 }
12341 }
12342 }
12343 }
12344 }
12345 }
12346
12347 if (SimplifySelectOps(N, N1, N2))
12348 return SDValue(N, 0); // Don't revisit N.
12349
12350 // Fold (vselect all_ones, N1, N2) -> N1
12352 return N1;
12353 // Fold (vselect all_zeros, N1, N2) -> N2
12355 return N2;
12356
12357 // The ConvertSelectToConcatVector function is assuming both the above
12358 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12359 // and addressed.
12360 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12363 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12364 return CV;
12365 }
12366
12367 if (SDValue V = foldVSelectOfConstants(N))
12368 return V;
12369
12370 if (hasOperation(ISD::SRA, VT))
12372 return V;
12373
12375 return SDValue(N, 0);
12376
12377 return SDValue();
12378}
12379
12380SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12381 SDValue N0 = N->getOperand(0);
12382 SDValue N1 = N->getOperand(1);
12383 SDValue N2 = N->getOperand(2);
12384 SDValue N3 = N->getOperand(3);
12385 SDValue N4 = N->getOperand(4);
12386 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12387
12388 // fold select_cc lhs, rhs, x, x, cc -> x
12389 if (N2 == N3)
12390 return N2;
12391
12392 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12393 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12394 isNullConstant(N1))
12395 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12396
12397 // Determine if the condition we're dealing with is constant
12398 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12399 CC, SDLoc(N), false)) {
12400 AddToWorklist(SCC.getNode());
12401
12402 // cond always true -> true val
12403 // cond always false -> false val
12404 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12405 return SCCC->isZero() ? N3 : N2;
12406
12407 // When the condition is UNDEF, just return the first operand. This is
12408 // coherent the DAG creation, no setcc node is created in this case
12409 if (SCC->isUndef())
12410 return N2;
12411
12412 // Fold to a simpler select_cc
12413 if (SCC.getOpcode() == ISD::SETCC) {
12414 SDValue SelectOp = DAG.getNode(
12415 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12416 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12417 SelectOp->setFlags(SCC->getFlags());
12418 return SelectOp;
12419 }
12420 }
12421
12422 // If we can fold this based on the true/false value, do so.
12423 if (SimplifySelectOps(N, N2, N3))
12424 return SDValue(N, 0); // Don't revisit N.
12425
12426 // fold select_cc into other things, such as min/max/abs
12427 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12428}
12429
12430SDValue DAGCombiner::visitSETCC(SDNode *N) {
12431 // setcc is very commonly used as an argument to brcond. This pattern
12432 // also lend itself to numerous combines and, as a result, it is desired
12433 // we keep the argument to a brcond as a setcc as much as possible.
12434 bool PreferSetCC =
12435 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12436
12437 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12438 EVT VT = N->getValueType(0);
12439 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12440
12441 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12442
12443 if (Combined) {
12444 // If we prefer to have a setcc, and we don't, we'll try our best to
12445 // recreate one using rebuildSetCC.
12446 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12447 SDValue NewSetCC = rebuildSetCC(Combined);
12448
12449 // We don't have anything interesting to combine to.
12450 if (NewSetCC.getNode() == N)
12451 return SDValue();
12452
12453 if (NewSetCC)
12454 return NewSetCC;
12455 }
12456 return Combined;
12457 }
12458
12459 // Optimize
12460 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12461 // or
12462 // 2) (icmp eq/ne X, (rotate X, C1))
12463 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12464 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12465 // Then:
12466 // If C1 is a power of 2, then the rotate and shift+and versions are
12467 // equivilent, so we can interchange them depending on target preference.
12468 // Otherwise, if we have the shift+and version we can interchange srl/shl
12469 // which inturn affects the constant C0. We can use this to get better
12470 // constants again determined by target preference.
12471 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12472 auto IsAndWithShift = [](SDValue A, SDValue B) {
12473 return A.getOpcode() == ISD::AND &&
12474 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12475 A.getOperand(0) == B.getOperand(0);
12476 };
12477 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12478 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12479 B.getOperand(0) == A;
12480 };
12481 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12482 bool IsRotate = false;
12483
12484 // Find either shift+and or rotate pattern.
12485 if (IsAndWithShift(N0, N1)) {
12486 AndOrOp = N0;
12487 ShiftOrRotate = N1;
12488 } else if (IsAndWithShift(N1, N0)) {
12489 AndOrOp = N1;
12490 ShiftOrRotate = N0;
12491 } else if (IsRotateWithOp(N0, N1)) {
12492 IsRotate = true;
12493 AndOrOp = N0;
12494 ShiftOrRotate = N1;
12495 } else if (IsRotateWithOp(N1, N0)) {
12496 IsRotate = true;
12497 AndOrOp = N1;
12498 ShiftOrRotate = N0;
12499 }
12500
12501 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12502 (IsRotate || AndOrOp.hasOneUse())) {
12503 EVT OpVT = N0.getValueType();
12504 // Get constant shift/rotate amount and possibly mask (if its shift+and
12505 // variant).
12506 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12507 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12508 /*AllowTrunc*/ false);
12509 if (CNode == nullptr)
12510 return std::nullopt;
12511 return CNode->getAPIntValue();
12512 };
12513 std::optional<APInt> AndCMask =
12514 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12515 std::optional<APInt> ShiftCAmt =
12516 GetAPIntValue(ShiftOrRotate.getOperand(1));
12517 unsigned NumBits = OpVT.getScalarSizeInBits();
12518
12519 // We found constants.
12520 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12521 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12522 // Check that the constants meet the constraints.
12523 bool CanTransform = IsRotate;
12524 if (!CanTransform) {
12525 // Check that mask and shift compliment eachother
12526 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12527 // Check that we are comparing all bits
12528 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12529 // Check that the and mask is correct for the shift
12530 CanTransform &=
12531 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12532 }
12533
12534 // See if target prefers another shift/rotate opcode.
12535 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12536 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12537 // Transform is valid and we have a new preference.
12538 if (CanTransform && NewShiftOpc != ShiftOpc) {
12539 SDLoc DL(N);
12540 SDValue NewShiftOrRotate =
12541 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12542 ShiftOrRotate.getOperand(1));
12543 SDValue NewAndOrOp = SDValue();
12544
12545 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12546 APInt NewMask =
12547 NewShiftOpc == ISD::SHL
12548 ? APInt::getHighBitsSet(NumBits,
12549 NumBits - ShiftCAmt->getZExtValue())
12550 : APInt::getLowBitsSet(NumBits,
12551 NumBits - ShiftCAmt->getZExtValue());
12552 NewAndOrOp =
12553 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12554 DAG.getConstant(NewMask, DL, OpVT));
12555 } else {
12556 NewAndOrOp = ShiftOrRotate.getOperand(0);
12557 }
12558
12559 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12560 }
12561 }
12562 }
12563 }
12564 return SDValue();
12565}
12566
12567SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12568 SDValue LHS = N->getOperand(0);
12569 SDValue RHS = N->getOperand(1);
12570 SDValue Carry = N->getOperand(2);
12571 SDValue Cond = N->getOperand(3);
12572
12573 // If Carry is false, fold to a regular SETCC.
12574 if (isNullConstant(Carry))
12575 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12576
12577 return SDValue();
12578}
12579
12580/// Check if N satisfies:
12581/// N is used once.
12582/// N is a Load.
12583/// The load is compatible with ExtOpcode. It means
12584/// If load has explicit zero/sign extension, ExpOpcode must have the same
12585/// extension.
12586/// Otherwise returns true.
12587static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12588 if (!N.hasOneUse())
12589 return false;
12590
12591 if (!isa<LoadSDNode>(N))
12592 return false;
12593
12594 LoadSDNode *Load = cast<LoadSDNode>(N);
12595 ISD::LoadExtType LoadExt = Load->getExtensionType();
12596 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12597 return true;
12598
12599 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12600 // extension.
12601 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12602 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12603 return false;
12604
12605 return true;
12606}
12607
12608/// Fold
12609/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12610/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12611/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12612/// This function is called by the DAGCombiner when visiting sext/zext/aext
12613/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12615 SelectionDAG &DAG,
12616 CombineLevel Level) {
12617 unsigned Opcode = N->getOpcode();
12618 SDValue N0 = N->getOperand(0);
12619 EVT VT = N->getValueType(0);
12620 SDLoc DL(N);
12621
12622 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12623 Opcode == ISD::ANY_EXTEND) &&
12624 "Expected EXTEND dag node in input!");
12625
12626 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12627 !N0.hasOneUse())
12628 return SDValue();
12629
12630 SDValue Op1 = N0->getOperand(1);
12631 SDValue Op2 = N0->getOperand(2);
12632 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12633 return SDValue();
12634
12635 auto ExtLoadOpcode = ISD::EXTLOAD;
12636 if (Opcode == ISD::SIGN_EXTEND)
12637 ExtLoadOpcode = ISD::SEXTLOAD;
12638 else if (Opcode == ISD::ZERO_EXTEND)
12639 ExtLoadOpcode = ISD::ZEXTLOAD;
12640
12641 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12642 // Combine2), so we should conservatively check the OperationAction.
12643 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12644 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12645 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12646 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12647 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12649 return SDValue();
12650
12651 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12652 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12653 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12654}
12655
12656/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12657/// a build_vector of constants.
12658/// This function is called by the DAGCombiner when visiting sext/zext/aext
12659/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12660/// Vector extends are not folded if operations are legal; this is to
12661/// avoid introducing illegal build_vector dag nodes.
12663 const TargetLowering &TLI,
12664 SelectionDAG &DAG, bool LegalTypes) {
12665 unsigned Opcode = N->getOpcode();
12666 SDValue N0 = N->getOperand(0);
12667 EVT VT = N->getValueType(0);
12668
12669 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12670 "Expected EXTEND dag node in input!");
12671
12672 // fold (sext c1) -> c1
12673 // fold (zext c1) -> c1
12674 // fold (aext c1) -> c1
12675 if (isa<ConstantSDNode>(N0))
12676 return DAG.getNode(Opcode, DL, VT, N0);
12677
12678 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12679 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12680 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12681 if (N0->getOpcode() == ISD::SELECT) {
12682 SDValue Op1 = N0->getOperand(1);
12683 SDValue Op2 = N0->getOperand(2);
12684 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12685 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12686 // For any_extend, choose sign extension of the constants to allow a
12687 // possible further transform to sign_extend_inreg.i.e.
12688 //
12689 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12690 // t2: i64 = any_extend t1
12691 // -->
12692 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12693 // -->
12694 // t4: i64 = sign_extend_inreg t3
12695 unsigned FoldOpc = Opcode;
12696 if (FoldOpc == ISD::ANY_EXTEND)
12697 FoldOpc = ISD::SIGN_EXTEND;
12698 return DAG.getSelect(DL, VT, N0->getOperand(0),
12699 DAG.getNode(FoldOpc, DL, VT, Op1),
12700 DAG.getNode(FoldOpc, DL, VT, Op2));
12701 }
12702 }
12703
12704 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12705 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12706 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12707 EVT SVT = VT.getScalarType();
12708 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12710 return SDValue();
12711
12712 // We can fold this node into a build_vector.
12713 unsigned VTBits = SVT.getSizeInBits();
12714 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12716 unsigned NumElts = VT.getVectorNumElements();
12717
12718 for (unsigned i = 0; i != NumElts; ++i) {
12719 SDValue Op = N0.getOperand(i);
12720 if (Op.isUndef()) {
12721 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12722 Elts.push_back(DAG.getUNDEF(SVT));
12723 else
12724 Elts.push_back(DAG.getConstant(0, DL, SVT));
12725 continue;
12726 }
12727
12728 SDLoc DL(Op);
12729 // Get the constant value and if needed trunc it to the size of the type.
12730 // Nodes like build_vector might have constants wider than the scalar type.
12731 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12732 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12733 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12734 else
12735 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12736 }
12737
12738 return DAG.getBuildVector(VT, DL, Elts);
12739}
12740
12741// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12742// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12743// transformation. Returns true if extension are possible and the above
12744// mentioned transformation is profitable.
12746 unsigned ExtOpc,
12747 SmallVectorImpl<SDNode *> &ExtendNodes,
12748 const TargetLowering &TLI) {
12749 bool HasCopyToRegUses = false;
12750 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12751 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12752 ++UI) {
12753 SDNode *User = *UI;
12754 if (User == N)
12755 continue;
12756 if (UI.getUse().getResNo() != N0.getResNo())
12757 continue;
12758 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12759 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12760 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12761 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12762 // Sign bits will be lost after a zext.
12763 return false;
12764 bool Add = false;
12765 for (unsigned i = 0; i != 2; ++i) {
12766 SDValue UseOp = User->getOperand(i);
12767 if (UseOp == N0)
12768 continue;
12769 if (!isa<ConstantSDNode>(UseOp))
12770 return false;
12771 Add = true;
12772 }
12773 if (Add)
12774 ExtendNodes.push_back(User);
12775 continue;
12776 }
12777 // If truncates aren't free and there are users we can't
12778 // extend, it isn't worthwhile.
12779 if (!isTruncFree)
12780 return false;
12781 // Remember if this value is live-out.
12782 if (User->getOpcode() == ISD::CopyToReg)
12783 HasCopyToRegUses = true;
12784 }
12785
12786 if (HasCopyToRegUses) {
12787 bool BothLiveOut = false;
12788 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12789 UI != UE; ++UI) {
12790 SDUse &Use = UI.getUse();
12791 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12792 BothLiveOut = true;
12793 break;
12794 }
12795 }
12796 if (BothLiveOut)
12797 // Both unextended and extended values are live out. There had better be
12798 // a good reason for the transformation.
12799 return !ExtendNodes.empty();
12800 }
12801 return true;
12802}
12803
12804void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12805 SDValue OrigLoad, SDValue ExtLoad,
12806 ISD::NodeType ExtType) {
12807 // Extend SetCC uses if necessary.
12808 SDLoc DL(ExtLoad);
12809 for (SDNode *SetCC : SetCCs) {
12811
12812 for (unsigned j = 0; j != 2; ++j) {
12813 SDValue SOp = SetCC->getOperand(j);
12814 if (SOp == OrigLoad)
12815 Ops.push_back(ExtLoad);
12816 else
12817 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12818 }
12819
12820 Ops.push_back(SetCC->getOperand(2));
12821 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12822 }
12823}
12824
12825// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12826SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12827 SDValue N0 = N->getOperand(0);
12828 EVT DstVT = N->getValueType(0);
12829 EVT SrcVT = N0.getValueType();
12830
12831 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12832 N->getOpcode() == ISD::ZERO_EXTEND) &&
12833 "Unexpected node type (not an extend)!");
12834
12835 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12836 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12837 // (v8i32 (sext (v8i16 (load x))))
12838 // into:
12839 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12840 // (v4i32 (sextload (x + 16)))))
12841 // Where uses of the original load, i.e.:
12842 // (v8i16 (load x))
12843 // are replaced with:
12844 // (v8i16 (truncate
12845 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12846 // (v4i32 (sextload (x + 16)))))))
12847 //
12848 // This combine is only applicable to illegal, but splittable, vectors.
12849 // All legal types, and illegal non-vector types, are handled elsewhere.
12850 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12851 //
12852 if (N0->getOpcode() != ISD::LOAD)
12853 return SDValue();
12854
12855 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12856
12857 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12858 !N0.hasOneUse() || !LN0->isSimple() ||
12859 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12861 return SDValue();
12862
12864 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12865 return SDValue();
12866
12867 ISD::LoadExtType ExtType =
12868 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12869
12870 // Try to split the vector types to get down to legal types.
12871 EVT SplitSrcVT = SrcVT;
12872 EVT SplitDstVT = DstVT;
12873 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12874 SplitSrcVT.getVectorNumElements() > 1) {
12875 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12876 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12877 }
12878
12879 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12880 return SDValue();
12881
12882 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12883
12884 SDLoc DL(N);
12885 const unsigned NumSplits =
12886 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12887 const unsigned Stride = SplitSrcVT.getStoreSize();
12890
12891 SDValue BasePtr = LN0->getBasePtr();
12892 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12893 const unsigned Offset = Idx * Stride;
12894
12895 SDValue SplitLoad =
12896 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
12897 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
12898 SplitSrcVT, LN0->getOriginalAlign(),
12899 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12900
12901 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12902
12903 Loads.push_back(SplitLoad.getValue(0));
12904 Chains.push_back(SplitLoad.getValue(1));
12905 }
12906
12907 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12908 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12909
12910 // Simplify TF.
12911 AddToWorklist(NewChain.getNode());
12912
12913 CombineTo(N, NewValue);
12914
12915 // Replace uses of the original load (before extension)
12916 // with a truncate of the concatenated sextloaded vectors.
12917 SDValue Trunc =
12918 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12919 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12920 CombineTo(N0.getNode(), Trunc, NewChain);
12921 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12922}
12923
12924// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12925// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12926SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12927 assert(N->getOpcode() == ISD::ZERO_EXTEND);
12928 EVT VT = N->getValueType(0);
12929 EVT OrigVT = N->getOperand(0).getValueType();
12930 if (TLI.isZExtFree(OrigVT, VT))
12931 return SDValue();
12932
12933 // and/or/xor
12934 SDValue N0 = N->getOperand(0);
12935 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12936 N0.getOperand(1).getOpcode() != ISD::Constant ||
12937 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12938 return SDValue();
12939
12940 // shl/shr
12941 SDValue N1 = N0->getOperand(0);
12942 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12943 N1.getOperand(1).getOpcode() != ISD::Constant ||
12944 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12945 return SDValue();
12946
12947 // load
12948 if (!isa<LoadSDNode>(N1.getOperand(0)))
12949 return SDValue();
12950 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12951 EVT MemVT = Load->getMemoryVT();
12952 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12953 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12954 return SDValue();
12955
12956
12957 // If the shift op is SHL, the logic op must be AND, otherwise the result
12958 // will be wrong.
12959 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12960 return SDValue();
12961
12962 if (!N0.hasOneUse() || !N1.hasOneUse())
12963 return SDValue();
12964
12966 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12967 ISD::ZERO_EXTEND, SetCCs, TLI))
12968 return SDValue();
12969
12970 // Actually do the transformation.
12971 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12972 Load->getChain(), Load->getBasePtr(),
12973 Load->getMemoryVT(), Load->getMemOperand());
12974
12975 SDLoc DL1(N1);
12976 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12977 N1.getOperand(1));
12978
12980 SDLoc DL0(N0);
12981 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
12982 DAG.getConstant(Mask, DL0, VT));
12983
12984 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12985 CombineTo(N, And);
12986 if (SDValue(Load, 0).hasOneUse()) {
12987 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
12988 } else {
12989 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
12990 Load->getValueType(0), ExtLoad);
12991 CombineTo(Load, Trunc, ExtLoad.getValue(1));
12992 }
12993
12994 // N0 is dead at this point.
12995 recursivelyDeleteUnusedNodes(N0.getNode());
12996
12997 return SDValue(N,0); // Return N so it doesn't get rechecked!
12998}
12999
13000/// If we're narrowing or widening the result of a vector select and the final
13001/// size is the same size as a setcc (compare) feeding the select, then try to
13002/// apply the cast operation to the select's operands because matching vector
13003/// sizes for a select condition and other operands should be more efficient.
13004SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13005 unsigned CastOpcode = Cast->getOpcode();
13006 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13007 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13008 CastOpcode == ISD::FP_ROUND) &&
13009 "Unexpected opcode for vector select narrowing/widening");
13010
13011 // We only do this transform before legal ops because the pattern may be
13012 // obfuscated by target-specific operations after legalization. Do not create
13013 // an illegal select op, however, because that may be difficult to lower.
13014 EVT VT = Cast->getValueType(0);
13015 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13016 return SDValue();
13017
13018 SDValue VSel = Cast->getOperand(0);
13019 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13020 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13021 return SDValue();
13022
13023 // Does the setcc have the same vector size as the casted select?
13024 SDValue SetCC = VSel.getOperand(0);
13025 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13026 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13027 return SDValue();
13028
13029 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13030 SDValue A = VSel.getOperand(1);
13031 SDValue B = VSel.getOperand(2);
13032 SDValue CastA, CastB;
13033 SDLoc DL(Cast);
13034 if (CastOpcode == ISD::FP_ROUND) {
13035 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13036 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13037 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13038 } else {
13039 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13040 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13041 }
13042 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13043}
13044
13045// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13046// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13048 const TargetLowering &TLI, EVT VT,
13049 bool LegalOperations, SDNode *N,
13050 SDValue N0, ISD::LoadExtType ExtLoadType) {
13051 SDNode *N0Node = N0.getNode();
13052 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13053 : ISD::isZEXTLoad(N0Node);
13054 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13055 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13056 return SDValue();
13057
13058 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13059 EVT MemVT = LN0->getMemoryVT();
13060 if ((LegalOperations || !LN0->isSimple() ||
13061 VT.isVector()) &&
13062 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13063 return SDValue();
13064
13065 SDValue ExtLoad =
13066 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13067 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13068 Combiner.CombineTo(N, ExtLoad);
13069 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13070 if (LN0->use_empty())
13071 Combiner.recursivelyDeleteUnusedNodes(LN0);
13072 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13073}
13074
13075// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13076// Only generate vector extloads when 1) they're legal, and 2) they are
13077// deemed desirable by the target. NonNegZExt can be set to true if a zero
13078// extend has the nonneg flag to allow use of sextload if profitable.
13080 const TargetLowering &TLI, EVT VT,
13081 bool LegalOperations, SDNode *N, SDValue N0,
13082 ISD::LoadExtType ExtLoadType,
13083 ISD::NodeType ExtOpc,
13084 bool NonNegZExt = false) {
13086 return {};
13087
13088 // If this is zext nneg, see if it would make sense to treat it as a sext.
13089 if (NonNegZExt) {
13090 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13091 "Unexpected load type or opcode");
13092 for (SDNode *User : N0->uses()) {
13093 if (User->getOpcode() == ISD::SETCC) {
13094 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13096 ExtLoadType = ISD::SEXTLOAD;
13097 ExtOpc = ISD::SIGN_EXTEND;
13098 break;
13099 }
13100 }
13101 }
13102 }
13103
13104 // TODO: isFixedLengthVector() should be removed and any negative effects on
13105 // code generation being the result of that target's implementation of
13106 // isVectorLoadExtDesirable().
13107 if ((LegalOperations || VT.isFixedLengthVector() ||
13108 !cast<LoadSDNode>(N0)->isSimple()) &&
13109 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13110 return {};
13111
13112 bool DoXform = true;
13114 if (!N0.hasOneUse())
13115 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13116 if (VT.isVector())
13117 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13118 if (!DoXform)
13119 return {};
13120
13121 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13122 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13123 LN0->getBasePtr(), N0.getValueType(),
13124 LN0->getMemOperand());
13125 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13126 // If the load value is used only by N, replace it via CombineTo N.
13127 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13128 Combiner.CombineTo(N, ExtLoad);
13129 if (NoReplaceTrunc) {
13130 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13131 Combiner.recursivelyDeleteUnusedNodes(LN0);
13132 } else {
13133 SDValue Trunc =
13134 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13135 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13136 }
13137 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13138}
13139
13140static SDValue
13142 bool LegalOperations, SDNode *N, SDValue N0,
13143 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13144 if (!N0.hasOneUse())
13145 return SDValue();
13146
13147 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13148 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13149 return SDValue();
13150
13151 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13152 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13153 return SDValue();
13154
13155 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13156 return SDValue();
13157
13158 SDLoc dl(Ld);
13159 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13160 SDValue NewLoad = DAG.getMaskedLoad(
13161 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13162 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13163 ExtLoadType, Ld->isExpandingLoad());
13164 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13165 return NewLoad;
13166}
13167
13168// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13170 const TargetLowering &TLI, EVT VT,
13171 SDValue N0,
13172 ISD::LoadExtType ExtLoadType) {
13173 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13174 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13175 return {};
13176 EVT MemoryVT = ALoad->getMemoryVT();
13177 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13178 return {};
13179 // Can't fold into ALoad if it is already extending differently.
13180 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13181 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13182 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13183 return {};
13184
13185 EVT OrigVT = ALoad->getValueType(0);
13186 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13187 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13188 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13189 ALoad->getBasePtr(), ALoad->getMemOperand()));
13190 NewALoad->setExtensionType(ExtLoadType);
13192 SDValue(ALoad, 0),
13193 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13194 // Update the chain uses.
13195 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13196 return SDValue(NewALoad, 0);
13197}
13198
13200 bool LegalOperations) {
13201 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13202 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13203
13204 SDValue SetCC = N->getOperand(0);
13205 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13206 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13207 return SDValue();
13208
13209 SDValue X = SetCC.getOperand(0);
13210 SDValue Ones = SetCC.getOperand(1);
13211 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13212 EVT VT = N->getValueType(0);
13213 EVT XVT = X.getValueType();
13214 // setge X, C is canonicalized to setgt, so we do not need to match that
13215 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13216 // not require the 'not' op.
13217 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13218 // Invert and smear/shift the sign bit:
13219 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13220 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13221 SDLoc DL(N);
13222 unsigned ShCt = VT.getSizeInBits() - 1;
13223 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13224 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13225 SDValue NotX = DAG.getNOT(DL, X, VT);
13226 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13227 auto ShiftOpcode =
13228 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13229 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13230 }
13231 }
13232 return SDValue();
13233}
13234
13235SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13236 SDValue N0 = N->getOperand(0);
13237 if (N0.getOpcode() != ISD::SETCC)
13238 return SDValue();
13239
13240 SDValue N00 = N0.getOperand(0);
13241 SDValue N01 = N0.getOperand(1);
13242 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13243 EVT VT = N->getValueType(0);
13244 EVT N00VT = N00.getValueType();
13245 SDLoc DL(N);
13246
13247 // Propagate fast-math-flags.
13248 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13249
13250 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13251 // the same size as the compared operands. Try to optimize sext(setcc())
13252 // if this is the case.
13253 if (VT.isVector() && !LegalOperations &&
13254 TLI.getBooleanContents(N00VT) ==
13256 EVT SVT = getSetCCResultType(N00VT);
13257
13258 // If we already have the desired type, don't change it.
13259 if (SVT != N0.getValueType()) {
13260 // We know that the # elements of the results is the same as the
13261 // # elements of the compare (and the # elements of the compare result
13262 // for that matter). Check to see that they are the same size. If so,
13263 // we know that the element size of the sext'd result matches the
13264 // element size of the compare operands.
13265 if (VT.getSizeInBits() == SVT.getSizeInBits())
13266 return DAG.getSetCC(DL, VT, N00, N01, CC);
13267
13268 // If the desired elements are smaller or larger than the source
13269 // elements, we can use a matching integer vector type and then
13270 // truncate/sign extend.
13271 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13272 if (SVT == MatchingVecType) {
13273 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13274 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13275 }
13276 }
13277
13278 // Try to eliminate the sext of a setcc by zexting the compare operands.
13279 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13281 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13282 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13283 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13284
13285 // We have an unsupported narrow vector compare op that would be legal
13286 // if extended to the destination type. See if the compare operands
13287 // can be freely extended to the destination type.
13288 auto IsFreeToExtend = [&](SDValue V) {
13289 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13290 return true;
13291 // Match a simple, non-extended load that can be converted to a
13292 // legal {z/s}ext-load.
13293 // TODO: Allow widening of an existing {z/s}ext-load?
13294 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13295 ISD::isUNINDEXEDLoad(V.getNode()) &&
13296 cast<LoadSDNode>(V)->isSimple() &&
13297 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13298 return false;
13299
13300 // Non-chain users of this value must either be the setcc in this
13301 // sequence or extends that can be folded into the new {z/s}ext-load.
13302 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13303 UI != UE; ++UI) {
13304 // Skip uses of the chain and the setcc.
13305 SDNode *User = *UI;
13306 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13307 continue;
13308 // Extra users must have exactly the same cast we are about to create.
13309 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13310 // is enhanced similarly.
13311 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13312 return false;
13313 }
13314 return true;
13315 };
13316
13317 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13318 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13319 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13320 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13321 }
13322 }
13323 }
13324
13325 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13326 // Here, T can be 1 or -1, depending on the type of the setcc and
13327 // getBooleanContents().
13328 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13329
13330 // To determine the "true" side of the select, we need to know the high bit
13331 // of the value returned by the setcc if it evaluates to true.
13332 // If the type of the setcc is i1, then the true case of the select is just
13333 // sext(i1 1), that is, -1.
13334 // If the type of the setcc is larger (say, i8) then the value of the high
13335 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13336 // of the appropriate width.
13337 SDValue ExtTrueVal = (SetCCWidth == 1)
13338 ? DAG.getAllOnesConstant(DL, VT)
13339 : DAG.getBoolConstant(true, DL, VT, N00VT);
13340 SDValue Zero = DAG.getConstant(0, DL, VT);
13341 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13342 return SCC;
13343
13344 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13345 EVT SetCCVT = getSetCCResultType(N00VT);
13346 // Don't do this transform for i1 because there's a select transform
13347 // that would reverse it.
13348 // TODO: We should not do this transform at all without a target hook
13349 // because a sext is likely cheaper than a select?
13350 if (SetCCVT.getScalarSizeInBits() != 1 &&
13351 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13352 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13353 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13354 }
13355 }
13356
13357 return SDValue();
13358}
13359
13360SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13361 SDValue N0 = N->getOperand(0);
13362 EVT VT = N->getValueType(0);
13363 SDLoc DL(N);
13364
13365 if (VT.isVector())
13366 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13367 return FoldedVOp;
13368
13369 // sext(undef) = 0 because the top bit will all be the same.
13370 if (N0.isUndef())
13371 return DAG.getConstant(0, DL, VT);
13372
13373 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13374 return Res;
13375
13376 // fold (sext (sext x)) -> (sext x)
13377 // fold (sext (aext x)) -> (sext x)
13378 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13379 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13380
13381 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13382 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13386 N0.getOperand(0));
13387
13388 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13389 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13390 SDValue N00 = N0.getOperand(0);
13391 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13392 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13393 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13394 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13395 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13396 }
13397 }
13398
13399 if (N0.getOpcode() == ISD::TRUNCATE) {
13400 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13401 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13402 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13403 SDNode *oye = N0.getOperand(0).getNode();
13404 if (NarrowLoad.getNode() != N0.getNode()) {
13405 CombineTo(N0.getNode(), NarrowLoad);
13406 // CombineTo deleted the truncate, if needed, but not what's under it.
13407 AddToWorklist(oye);
13408 }
13409 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13410 }
13411
13412 // See if the value being truncated is already sign extended. If so, just
13413 // eliminate the trunc/sext pair.
13414 SDValue Op = N0.getOperand(0);
13415 unsigned OpBits = Op.getScalarValueSizeInBits();
13416 unsigned MidBits = N0.getScalarValueSizeInBits();
13417 unsigned DestBits = VT.getScalarSizeInBits();
13418 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13419
13420 if (OpBits == DestBits) {
13421 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13422 // bits, it is already ready.
13423 if (NumSignBits > DestBits-MidBits)
13424 return Op;
13425 } else if (OpBits < DestBits) {
13426 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13427 // bits, just sext from i32.
13428 if (NumSignBits > OpBits-MidBits)
13429 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13430 } else {
13431 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13432 // bits, just truncate to i32.
13433 if (NumSignBits > OpBits-MidBits)
13434 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13435 }
13436
13437 // fold (sext (truncate x)) -> (sextinreg x).
13438 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13439 N0.getValueType())) {
13440 if (OpBits < DestBits)
13441 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13442 else if (OpBits > DestBits)
13443 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13444 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13445 DAG.getValueType(N0.getValueType()));
13446 }
13447 }
13448
13449 // Try to simplify (sext (load x)).
13450 if (SDValue foldedExt =
13451 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13453 return foldedExt;
13454
13455 if (SDValue foldedExt =
13456 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13458 return foldedExt;
13459
13460 // fold (sext (load x)) to multiple smaller sextloads.
13461 // Only on illegal but splittable vectors.
13462 if (SDValue ExtLoad = CombineExtLoad(N))
13463 return ExtLoad;
13464
13465 // Try to simplify (sext (sextload x)).
13466 if (SDValue foldedExt = tryToFoldExtOfExtload(
13467 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13468 return foldedExt;
13469
13470 // Try to simplify (sext (atomic_load x)).
13471 if (SDValue foldedExt =
13472 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13473 return foldedExt;
13474
13475 // fold (sext (and/or/xor (load x), cst)) ->
13476 // (and/or/xor (sextload x), (sext cst))
13477 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13478 isa<LoadSDNode>(N0.getOperand(0)) &&
13479 N0.getOperand(1).getOpcode() == ISD::Constant &&
13480 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13481 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13482 EVT MemVT = LN00->getMemoryVT();
13483 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13484 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13486 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13487 ISD::SIGN_EXTEND, SetCCs, TLI);
13488 if (DoXform) {
13489 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13490 LN00->getChain(), LN00->getBasePtr(),
13491 LN00->getMemoryVT(),
13492 LN00->getMemOperand());
13494 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13495 ExtLoad, DAG.getConstant(Mask, DL, VT));
13496 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13497 bool NoReplaceTruncAnd = !N0.hasOneUse();
13498 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13499 CombineTo(N, And);
13500 // If N0 has multiple uses, change other uses as well.
13501 if (NoReplaceTruncAnd) {
13502 SDValue TruncAnd =
13504 CombineTo(N0.getNode(), TruncAnd);
13505 }
13506 if (NoReplaceTrunc) {
13507 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13508 } else {
13509 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13510 LN00->getValueType(0), ExtLoad);
13511 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13512 }
13513 return SDValue(N,0); // Return N so it doesn't get rechecked!
13514 }
13515 }
13516 }
13517
13518 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13519 return V;
13520
13521 if (SDValue V = foldSextSetcc(N))
13522 return V;
13523
13524 // fold (sext x) -> (zext x) if the sign bit is known zero.
13525 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13526 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13527 DAG.SignBitIsZero(N0)) {
13529 Flags.setNonNeg(true);
13530 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13531 }
13532
13533 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13534 return NewVSel;
13535
13536 // Eliminate this sign extend by doing a negation in the destination type:
13537 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13538 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13542 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13543 return DAG.getNegative(Zext, DL, VT);
13544 }
13545 // Eliminate this sign extend by doing a decrement in the destination type:
13546 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13547 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13551 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13552 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13553 }
13554
13555 // fold sext (not i1 X) -> add (zext i1 X), -1
13556 // TODO: This could be extended to handle bool vectors.
13557 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13558 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13559 TLI.isOperationLegal(ISD::ADD, VT)))) {
13560 // If we can eliminate the 'not', the sext form should be better
13561 if (SDValue NewXor = visitXOR(N0.getNode())) {
13562 // Returning N0 is a form of in-visit replacement that may have
13563 // invalidated N0.
13564 if (NewXor.getNode() == N0.getNode()) {
13565 // Return SDValue here as the xor should have already been replaced in
13566 // this sext.
13567 return SDValue();
13568 }
13569
13570 // Return a new sext with the new xor.
13571 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13572 }
13573
13574 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13575 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13576 }
13577
13578 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13579 return Res;
13580
13581 return SDValue();
13582}
13583
13584/// Given an extending node with a pop-count operand, if the target does not
13585/// support a pop-count in the narrow source type but does support it in the
13586/// destination type, widen the pop-count to the destination type.
13587static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13588 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13589 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13590
13591 SDValue CtPop = Extend->getOperand(0);
13592 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13593 return SDValue();
13594
13595 EVT VT = Extend->getValueType(0);
13596 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13599 return SDValue();
13600
13601 // zext (ctpop X) --> ctpop (zext X)
13602 SDLoc DL(Extend);
13603 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13604 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13605}
13606
13607// If we have (zext (abs X)) where X is a type that will be promoted by type
13608// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13609static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13610 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13611
13612 EVT VT = Extend->getValueType(0);
13613 if (VT.isVector())
13614 return SDValue();
13615
13616 SDValue Abs = Extend->getOperand(0);
13617 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13618 return SDValue();
13619
13620 EVT AbsVT = Abs.getValueType();
13621 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13622 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13624 return SDValue();
13625
13626 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13627
13628 SDValue SExt =
13629 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13630 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13631 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13632}
13633
13634SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13635 SDValue N0 = N->getOperand(0);
13636 EVT VT = N->getValueType(0);
13637 SDLoc DL(N);
13638
13639 if (VT.isVector())
13640 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13641 return FoldedVOp;
13642
13643 // zext(undef) = 0
13644 if (N0.isUndef())
13645 return DAG.getConstant(0, DL, VT);
13646
13647 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13648 return Res;
13649
13650 // fold (zext (zext x)) -> (zext x)
13651 // fold (zext (aext x)) -> (zext x)
13652 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13654 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13655 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13656 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13657 }
13658
13659 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13660 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13664 N0.getOperand(0));
13665
13666 // fold (zext (truncate x)) -> (zext x) or
13667 // (zext (truncate x)) -> (truncate x)
13668 // This is valid when the truncated bits of x are already zero.
13669 SDValue Op;
13670 KnownBits Known;
13671 if (isTruncateOf(DAG, N0, Op, Known)) {
13672 APInt TruncatedBits =
13673 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13674 APInt(Op.getScalarValueSizeInBits(), 0) :
13675 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13677 std::min(Op.getScalarValueSizeInBits(),
13678 VT.getScalarSizeInBits()));
13679 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13680 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13681 DAG.salvageDebugInfo(*N0.getNode());
13682
13683 return ZExtOrTrunc;
13684 }
13685 }
13686
13687 // fold (zext (truncate x)) -> (and x, mask)
13688 if (N0.getOpcode() == ISD::TRUNCATE) {
13689 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13690 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13691 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13692 SDNode *oye = N0.getOperand(0).getNode();
13693 if (NarrowLoad.getNode() != N0.getNode()) {
13694 CombineTo(N0.getNode(), NarrowLoad);
13695 // CombineTo deleted the truncate, if needed, but not what's under it.
13696 AddToWorklist(oye);
13697 }
13698 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13699 }
13700
13701 EVT SrcVT = N0.getOperand(0).getValueType();
13702 EVT MinVT = N0.getValueType();
13703
13704 if (N->getFlags().hasNonNeg()) {
13705 SDValue Op = N0.getOperand(0);
13706 unsigned OpBits = SrcVT.getScalarSizeInBits();
13707 unsigned MidBits = MinVT.getScalarSizeInBits();
13708 unsigned DestBits = VT.getScalarSizeInBits();
13709 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13710
13711 if (OpBits == DestBits) {
13712 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13713 // bits, it is already ready.
13714 if (NumSignBits > DestBits - MidBits)
13715 return Op;
13716 } else if (OpBits < DestBits) {
13717 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13718 // bits, just sext from i32.
13719 // FIXME: This can probably be ZERO_EXTEND nneg?
13720 if (NumSignBits > OpBits - MidBits)
13721 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13722 } else {
13723 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13724 // bits, just truncate to i32.
13725 if (NumSignBits > OpBits - MidBits)
13726 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13727 }
13728 }
13729
13730 // Try to mask before the extension to avoid having to generate a larger mask,
13731 // possibly over several sub-vectors.
13732 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13733 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13735 SDValue Op = N0.getOperand(0);
13736 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13737 AddToWorklist(Op.getNode());
13738 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13739 // Transfer the debug info; the new node is equivalent to N0.
13740 DAG.transferDbgValues(N0, ZExtOrTrunc);
13741 return ZExtOrTrunc;
13742 }
13743 }
13744
13745 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13746 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13747 AddToWorklist(Op.getNode());
13748 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13749 // We may safely transfer the debug info describing the truncate node over
13750 // to the equivalent and operation.
13751 DAG.transferDbgValues(N0, And);
13752 return And;
13753 }
13754 }
13755
13756 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13757 // if either of the casts is not free.
13758 if (N0.getOpcode() == ISD::AND &&
13759 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13760 N0.getOperand(1).getOpcode() == ISD::Constant &&
13761 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13762 !TLI.isZExtFree(N0.getValueType(), VT))) {
13763 SDValue X = N0.getOperand(0).getOperand(0);
13764 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13766 return DAG.getNode(ISD::AND, DL, VT,
13767 X, DAG.getConstant(Mask, DL, VT));
13768 }
13769
13770 // Try to simplify (zext (load x)).
13771 if (SDValue foldedExt = tryToFoldExtOfLoad(
13772 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13773 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13774 return foldedExt;
13775
13776 if (SDValue foldedExt =
13777 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13779 return foldedExt;
13780
13781 // fold (zext (load x)) to multiple smaller zextloads.
13782 // Only on illegal but splittable vectors.
13783 if (SDValue ExtLoad = CombineExtLoad(N))
13784 return ExtLoad;
13785
13786 // Try to simplify (zext (atomic_load x)).
13787 if (SDValue foldedExt =
13788 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13789 return foldedExt;
13790
13791 // fold (zext (and/or/xor (load x), cst)) ->
13792 // (and/or/xor (zextload x), (zext cst))
13793 // Unless (and (load x) cst) will match as a zextload already and has
13794 // additional users, or the zext is already free.
13795 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13796 isa<LoadSDNode>(N0.getOperand(0)) &&
13797 N0.getOperand(1).getOpcode() == ISD::Constant &&
13798 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13799 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13800 EVT MemVT = LN00->getMemoryVT();
13801 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13802 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13803 bool DoXform = true;
13805 if (!N0.hasOneUse()) {
13806 if (N0.getOpcode() == ISD::AND) {
13807 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13808 EVT LoadResultTy = AndC->getValueType(0);
13809 EVT ExtVT;
13810 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13811 DoXform = false;
13812 }
13813 }
13814 if (DoXform)
13815 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13816 ISD::ZERO_EXTEND, SetCCs, TLI);
13817 if (DoXform) {
13818 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13819 LN00->getChain(), LN00->getBasePtr(),
13820 LN00->getMemoryVT(),
13821 LN00->getMemOperand());
13823 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13824 ExtLoad, DAG.getConstant(Mask, DL, VT));
13825 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13826 bool NoReplaceTruncAnd = !N0.hasOneUse();
13827 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13828 CombineTo(N, And);
13829 // If N0 has multiple uses, change other uses as well.
13830 if (NoReplaceTruncAnd) {
13831 SDValue TruncAnd =
13833 CombineTo(N0.getNode(), TruncAnd);
13834 }
13835 if (NoReplaceTrunc) {
13836 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13837 } else {
13838 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13839 LN00->getValueType(0), ExtLoad);
13840 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13841 }
13842 return SDValue(N,0); // Return N so it doesn't get rechecked!
13843 }
13844 }
13845 }
13846
13847 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13848 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13849 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13850 return ZExtLoad;
13851
13852 // Try to simplify (zext (zextload x)).
13853 if (SDValue foldedExt = tryToFoldExtOfExtload(
13854 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13855 return foldedExt;
13856
13857 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13858 return V;
13859
13860 if (N0.getOpcode() == ISD::SETCC) {
13861 // Propagate fast-math-flags.
13862 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13863
13864 // Only do this before legalize for now.
13865 if (!LegalOperations && VT.isVector() &&
13866 N0.getValueType().getVectorElementType() == MVT::i1) {
13867 EVT N00VT = N0.getOperand(0).getValueType();
13868 if (getSetCCResultType(N00VT) == N0.getValueType())
13869 return SDValue();
13870
13871 // We know that the # elements of the results is the same as the #
13872 // elements of the compare (and the # elements of the compare result for
13873 // that matter). Check to see that they are the same size. If so, we know
13874 // that the element size of the sext'd result matches the element size of
13875 // the compare operands.
13876 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13877 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13878 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13879 N0.getOperand(1), N0.getOperand(2));
13880 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13881 }
13882
13883 // If the desired elements are smaller or larger than the source
13884 // elements we can use a matching integer vector type and then
13885 // truncate/any extend followed by zext_in_reg.
13886 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13887 SDValue VsetCC =
13888 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13889 N0.getOperand(1), N0.getOperand(2));
13890 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13891 N0.getValueType());
13892 }
13893
13894 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13895 EVT N0VT = N0.getValueType();
13896 EVT N00VT = N0.getOperand(0).getValueType();
13897 if (SDValue SCC = SimplifySelectCC(
13898 DL, N0.getOperand(0), N0.getOperand(1),
13899 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13900 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13901 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13902 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13903 }
13904
13905 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13906 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13907 !TLI.isZExtFree(N0, VT)) {
13908 SDValue ShVal = N0.getOperand(0);
13909 SDValue ShAmt = N0.getOperand(1);
13910 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13911 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13912 if (N0.getOpcode() == ISD::SHL) {
13913 // If the original shl may be shifting out bits, do not perform this
13914 // transformation.
13915 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13916 ShVal.getOperand(0).getValueSizeInBits();
13917 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13918 // If the shift is too large, then see if we can deduce that the
13919 // shift is safe anyway.
13920 // Create a mask that has ones for the bits being shifted out.
13921 APInt ShiftOutMask =
13923 ShAmtC->getAPIntValue().getZExtValue());
13924
13925 // Check if the bits being shifted out are known to be zero.
13926 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13927 return SDValue();
13928 }
13929 }
13930
13931 // Ensure that the shift amount is wide enough for the shifted value.
13932 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13933 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13934
13935 return DAG.getNode(N0.getOpcode(), DL, VT,
13936 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13937 }
13938 }
13939 }
13940
13941 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13942 return NewVSel;
13943
13944 if (SDValue NewCtPop = widenCtPop(N, DAG))
13945 return NewCtPop;
13946
13947 if (SDValue V = widenAbs(N, DAG))
13948 return V;
13949
13950 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13951 return Res;
13952
13953 // CSE zext nneg with sext if the zext is not free.
13954 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
13955 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
13956 if (CSENode)
13957 return SDValue(CSENode, 0);
13958 }
13959
13960 return SDValue();
13961}
13962
13963SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13964 SDValue N0 = N->getOperand(0);
13965 EVT VT = N->getValueType(0);
13966 SDLoc DL(N);
13967
13968 // aext(undef) = undef
13969 if (N0.isUndef())
13970 return DAG.getUNDEF(VT);
13971
13972 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13973 return Res;
13974
13975 // fold (aext (aext x)) -> (aext x)
13976 // fold (aext (zext x)) -> (zext x)
13977 // fold (aext (sext x)) -> (sext x)
13978 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
13979 N0.getOpcode() == ISD::SIGN_EXTEND) {
13981 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13982 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13983 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
13984 }
13985
13986 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13987 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13988 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13992 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
13993
13994 // fold (aext (truncate (load x))) -> (aext (smaller load x))
13995 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13996 if (N0.getOpcode() == ISD::TRUNCATE) {
13997 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13998 SDNode *oye = N0.getOperand(0).getNode();
13999 if (NarrowLoad.getNode() != N0.getNode()) {
14000 CombineTo(N0.getNode(), NarrowLoad);
14001 // CombineTo deleted the truncate, if needed, but not what's under it.
14002 AddToWorklist(oye);
14003 }
14004 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14005 }
14006 }
14007
14008 // fold (aext (truncate x))
14009 if (N0.getOpcode() == ISD::TRUNCATE)
14010 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14011
14012 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14013 // if the trunc is not free.
14014 if (N0.getOpcode() == ISD::AND &&
14015 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14016 N0.getOperand(1).getOpcode() == ISD::Constant &&
14017 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14018 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14019 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14020 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14021 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14022 }
14023
14024 // fold (aext (load x)) -> (aext (truncate (extload x)))
14025 // None of the supported targets knows how to perform load and any_ext
14026 // on vectors in one instruction, so attempt to fold to zext instead.
14027 if (VT.isVector()) {
14028 // Try to simplify (zext (load x)).
14029 if (SDValue foldedExt =
14030 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14032 return foldedExt;
14033 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14035 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14036 bool DoXform = true;
14038 if (!N0.hasOneUse())
14039 DoXform =
14040 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14041 if (DoXform) {
14042 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14043 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14044 LN0->getBasePtr(), N0.getValueType(),
14045 LN0->getMemOperand());
14046 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14047 // If the load value is used only by N, replace it via CombineTo N.
14048 bool NoReplaceTrunc = N0.hasOneUse();
14049 CombineTo(N, ExtLoad);
14050 if (NoReplaceTrunc) {
14051 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14052 recursivelyDeleteUnusedNodes(LN0);
14053 } else {
14054 SDValue Trunc =
14055 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14056 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14057 }
14058 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14059 }
14060 }
14061
14062 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14063 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14064 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14065 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14066 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14067 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14068 ISD::LoadExtType ExtType = LN0->getExtensionType();
14069 EVT MemVT = LN0->getMemoryVT();
14070 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14071 SDValue ExtLoad =
14072 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14073 MemVT, LN0->getMemOperand());
14074 CombineTo(N, ExtLoad);
14075 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14076 recursivelyDeleteUnusedNodes(LN0);
14077 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14078 }
14079 }
14080
14081 if (N0.getOpcode() == ISD::SETCC) {
14082 // Propagate fast-math-flags.
14083 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14084
14085 // For vectors:
14086 // aext(setcc) -> vsetcc
14087 // aext(setcc) -> truncate(vsetcc)
14088 // aext(setcc) -> aext(vsetcc)
14089 // Only do this before legalize for now.
14090 if (VT.isVector() && !LegalOperations) {
14091 EVT N00VT = N0.getOperand(0).getValueType();
14092 if (getSetCCResultType(N00VT) == N0.getValueType())
14093 return SDValue();
14094
14095 // We know that the # elements of the results is the same as the
14096 // # elements of the compare (and the # elements of the compare result
14097 // for that matter). Check to see that they are the same size. If so,
14098 // we know that the element size of the sext'd result matches the
14099 // element size of the compare operands.
14100 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14101 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14102 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14103
14104 // If the desired elements are smaller or larger than the source
14105 // elements we can use a matching integer vector type and then
14106 // truncate/any extend
14107 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14108 SDValue VsetCC = DAG.getSetCC(
14109 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14110 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14111 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14112 }
14113
14114 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14115 if (SDValue SCC = SimplifySelectCC(
14116 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14117 DAG.getConstant(0, DL, VT),
14118 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14119 return SCC;
14120 }
14121
14122 if (SDValue NewCtPop = widenCtPop(N, DAG))
14123 return NewCtPop;
14124
14125 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14126 return Res;
14127
14128 return SDValue();
14129}
14130
14131SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14132 unsigned Opcode = N->getOpcode();
14133 SDValue N0 = N->getOperand(0);
14134 SDValue N1 = N->getOperand(1);
14135 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14136
14137 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14138 if (N0.getOpcode() == Opcode &&
14139 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14140 return N0;
14141
14142 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14143 N0.getOperand(0).getOpcode() == Opcode) {
14144 // We have an assert, truncate, assert sandwich. Make one stronger assert
14145 // by asserting on the smallest asserted type to the larger source type.
14146 // This eliminates the later assert:
14147 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14148 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14149 SDLoc DL(N);
14150 SDValue BigA = N0.getOperand(0);
14151 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14152 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14153 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14154 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14155 BigA.getOperand(0), MinAssertVTVal);
14156 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14157 }
14158
14159 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14160 // than X. Just move the AssertZext in front of the truncate and drop the
14161 // AssertSExt.
14162 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14164 Opcode == ISD::AssertZext) {
14165 SDValue BigA = N0.getOperand(0);
14166 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14167 if (AssertVT.bitsLT(BigA_AssertVT)) {
14168 SDLoc DL(N);
14169 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14170 BigA.getOperand(0), N1);
14171 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14172 }
14173 }
14174
14175 return SDValue();
14176}
14177
14178SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14179 SDLoc DL(N);
14180
14181 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14182 SDValue N0 = N->getOperand(0);
14183
14184 // Fold (assertalign (assertalign x, AL0), AL1) ->
14185 // (assertalign x, max(AL0, AL1))
14186 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14187 return DAG.getAssertAlign(DL, N0.getOperand(0),
14188 std::max(AL, AAN->getAlign()));
14189
14190 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14191 // this assert down to source operands so that those arithmetic ops could be
14192 // exposed to the DAG combining.
14193 switch (N0.getOpcode()) {
14194 default:
14195 break;
14196 case ISD::ADD:
14197 case ISD::SUB: {
14198 unsigned AlignShift = Log2(AL);
14199 SDValue LHS = N0.getOperand(0);
14200 SDValue RHS = N0.getOperand(1);
14201 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14202 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14203 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14204 if (LHSAlignShift < AlignShift)
14205 LHS = DAG.getAssertAlign(DL, LHS, AL);
14206 if (RHSAlignShift < AlignShift)
14207 RHS = DAG.getAssertAlign(DL, RHS, AL);
14208 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14209 }
14210 break;
14211 }
14212 }
14213
14214 return SDValue();
14215}
14216
14217/// If the result of a load is shifted/masked/truncated to an effectively
14218/// narrower type, try to transform the load to a narrower type and/or
14219/// use an extending load.
14220SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14221 unsigned Opc = N->getOpcode();
14222
14224 SDValue N0 = N->getOperand(0);
14225 EVT VT = N->getValueType(0);
14226 EVT ExtVT = VT;
14227
14228 // This transformation isn't valid for vector loads.
14229 if (VT.isVector())
14230 return SDValue();
14231
14232 // The ShAmt variable is used to indicate that we've consumed a right
14233 // shift. I.e. we want to narrow the width of the load by skipping to load the
14234 // ShAmt least significant bits.
14235 unsigned ShAmt = 0;
14236 // A special case is when the least significant bits from the load are masked
14237 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14238 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14239 // the result.
14240 unsigned ShiftedOffset = 0;
14241 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14242 // extended to VT.
14243 if (Opc == ISD::SIGN_EXTEND_INREG) {
14244 ExtType = ISD::SEXTLOAD;
14245 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14246 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14247 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14248 // value, or it may be shifting a higher subword, half or byte into the
14249 // lowest bits.
14250
14251 // Only handle shift with constant shift amount, and the shiftee must be a
14252 // load.
14253 auto *LN = dyn_cast<LoadSDNode>(N0);
14254 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14255 if (!N1C || !LN)
14256 return SDValue();
14257 // If the shift amount is larger than the memory type then we're not
14258 // accessing any of the loaded bytes.
14259 ShAmt = N1C->getZExtValue();
14260 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14261 if (MemoryWidth <= ShAmt)
14262 return SDValue();
14263 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14264 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14265 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14266 // If original load is a SEXTLOAD then we can't simply replace it by a
14267 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14268 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14269 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14270 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14271 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14272 LN->getExtensionType() != ExtType)
14273 return SDValue();
14274 } else if (Opc == ISD::AND) {
14275 // An AND with a constant mask is the same as a truncate + zero-extend.
14276 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14277 if (!AndC)
14278 return SDValue();
14279
14280 const APInt &Mask = AndC->getAPIntValue();
14281 unsigned ActiveBits = 0;
14282 if (Mask.isMask()) {
14283 ActiveBits = Mask.countr_one();
14284 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14285 ShiftedOffset = ShAmt;
14286 } else {
14287 return SDValue();
14288 }
14289
14290 ExtType = ISD::ZEXTLOAD;
14291 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14292 }
14293
14294 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14295 // a right shift. Here we redo some of those checks, to possibly adjust the
14296 // ExtVT even further based on "a masking AND". We could also end up here for
14297 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14298 // need to be done here as well.
14299 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14300 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14301 // Bail out when the SRL has more than one use. This is done for historical
14302 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14303 // check below? And maybe it could be non-profitable to do the transform in
14304 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14305 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14306 if (!SRL.hasOneUse())
14307 return SDValue();
14308
14309 // Only handle shift with constant shift amount, and the shiftee must be a
14310 // load.
14311 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14312 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14313 if (!SRL1C || !LN)
14314 return SDValue();
14315
14316 // If the shift amount is larger than the input type then we're not
14317 // accessing any of the loaded bytes. If the load was a zextload/extload
14318 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14319 ShAmt = SRL1C->getZExtValue();
14320 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14321 if (ShAmt >= MemoryWidth)
14322 return SDValue();
14323
14324 // Because a SRL must be assumed to *need* to zero-extend the high bits
14325 // (as opposed to anyext the high bits), we can't combine the zextload
14326 // lowering of SRL and an sextload.
14327 if (LN->getExtensionType() == ISD::SEXTLOAD)
14328 return SDValue();
14329
14330 // Avoid reading outside the memory accessed by the original load (could
14331 // happened if we only adjust the load base pointer by ShAmt). Instead we
14332 // try to narrow the load even further. The typical scenario here is:
14333 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14334 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14335 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14336 // Don't replace sextload by zextload.
14337 if (ExtType == ISD::SEXTLOAD)
14338 return SDValue();
14339 // Narrow the load.
14340 ExtType = ISD::ZEXTLOAD;
14341 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14342 }
14343
14344 // If the SRL is only used by a masking AND, we may be able to adjust
14345 // the ExtVT to make the AND redundant.
14346 SDNode *Mask = *(SRL->use_begin());
14347 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14348 isa<ConstantSDNode>(Mask->getOperand(1))) {
14349 unsigned Offset, ActiveBits;
14350 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14351 if (ShiftMask.isMask()) {
14352 EVT MaskedVT =
14353 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14354 // If the mask is smaller, recompute the type.
14355 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14356 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14357 ExtVT = MaskedVT;
14358 } else if (ExtType == ISD::ZEXTLOAD &&
14359 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14360 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14361 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14362 // If the mask is shifted we can use a narrower load and a shl to insert
14363 // the trailing zeros.
14364 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14365 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14366 ExtVT = MaskedVT;
14367 ShAmt = Offset + ShAmt;
14368 ShiftedOffset = Offset;
14369 }
14370 }
14371 }
14372
14373 N0 = SRL.getOperand(0);
14374 }
14375
14376 // If the load is shifted left (and the result isn't shifted back right), we
14377 // can fold a truncate through the shift. The typical scenario is that N
14378 // points at a TRUNCATE here so the attempted fold is:
14379 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14380 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14381 unsigned ShLeftAmt = 0;
14382 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14383 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14384 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14385 ShLeftAmt = N01->getZExtValue();
14386 N0 = N0.getOperand(0);
14387 }
14388 }
14389
14390 // If we haven't found a load, we can't narrow it.
14391 if (!isa<LoadSDNode>(N0))
14392 return SDValue();
14393
14394 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14395 // Reducing the width of a volatile load is illegal. For atomics, we may be
14396 // able to reduce the width provided we never widen again. (see D66309)
14397 if (!LN0->isSimple() ||
14398 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14399 return SDValue();
14400
14401 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14402 unsigned LVTStoreBits =
14404 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14405 return LVTStoreBits - EVTStoreBits - ShAmt;
14406 };
14407
14408 // We need to adjust the pointer to the load by ShAmt bits in order to load
14409 // the correct bytes.
14410 unsigned PtrAdjustmentInBits =
14411 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14412
14413 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14414 SDLoc DL(LN0);
14415 // The original load itself didn't wrap, so an offset within it doesn't.
14417 Flags.setNoUnsignedWrap(true);
14418 SDValue NewPtr = DAG.getMemBasePlusOffset(
14419 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14420 AddToWorklist(NewPtr.getNode());
14421
14422 SDValue Load;
14423 if (ExtType == ISD::NON_EXTLOAD)
14424 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14425 LN0->getPointerInfo().getWithOffset(PtrOff),
14426 LN0->getOriginalAlign(),
14427 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14428 else
14429 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14430 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14431 LN0->getOriginalAlign(),
14432 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14433
14434 // Replace the old load's chain with the new load's chain.
14435 WorklistRemover DeadNodes(*this);
14436 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14437
14438 // Shift the result left, if we've swallowed a left shift.
14440 if (ShLeftAmt != 0) {
14441 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14442 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14443 ShImmTy = VT;
14444 // If the shift amount is as large as the result size (but, presumably,
14445 // no larger than the source) then the useful bits of the result are
14446 // zero; we can't simply return the shortened shift, because the result
14447 // of that operation is undefined.
14448 if (ShLeftAmt >= VT.getScalarSizeInBits())
14449 Result = DAG.getConstant(0, DL, VT);
14450 else
14451 Result = DAG.getNode(ISD::SHL, DL, VT,
14452 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14453 }
14454
14455 if (ShiftedOffset != 0) {
14456 // We're using a shifted mask, so the load now has an offset. This means
14457 // that data has been loaded into the lower bytes than it would have been
14458 // before, so we need to shl the loaded data into the correct position in the
14459 // register.
14460 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14461 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14462 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14463 }
14464
14465 // Return the new loaded value.
14466 return Result;
14467}
14468
14469SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14470 SDValue N0 = N->getOperand(0);
14471 SDValue N1 = N->getOperand(1);
14472 EVT VT = N->getValueType(0);
14473 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14474 unsigned VTBits = VT.getScalarSizeInBits();
14475 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14476
14477 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14478 if (N0.isUndef())
14479 return DAG.getConstant(0, SDLoc(N), VT);
14480
14481 // fold (sext_in_reg c1) -> c1
14483 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14484
14485 // If the input is already sign extended, just drop the extension.
14486 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14487 return N0;
14488
14489 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14490 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14491 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14492 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14493 N1);
14494
14495 // fold (sext_in_reg (sext x)) -> (sext x)
14496 // fold (sext_in_reg (aext x)) -> (sext x)
14497 // if x is small enough or if we know that x has more than 1 sign bit and the
14498 // sign_extend_inreg is extending from one of them.
14499 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14500 SDValue N00 = N0.getOperand(0);
14501 unsigned N00Bits = N00.getScalarValueSizeInBits();
14502 if ((N00Bits <= ExtVTBits ||
14503 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14504 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14505 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14506 }
14507
14508 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14509 // if x is small enough or if we know that x has more than 1 sign bit and the
14510 // sign_extend_inreg is extending from one of them.
14512 SDValue N00 = N0.getOperand(0);
14513 unsigned N00Bits = N00.getScalarValueSizeInBits();
14514 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14515 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14516 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14517 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14518 if ((N00Bits == ExtVTBits ||
14519 (!IsZext && (N00Bits < ExtVTBits ||
14520 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14521 (!LegalOperations ||
14523 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14524 }
14525
14526 // fold (sext_in_reg (zext x)) -> (sext x)
14527 // iff we are extending the source sign bit.
14528 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14529 SDValue N00 = N0.getOperand(0);
14530 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14531 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14532 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14533 }
14534
14535 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14536 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14537 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14538
14539 // fold operands of sext_in_reg based on knowledge that the top bits are not
14540 // demanded.
14542 return SDValue(N, 0);
14543
14544 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14545 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14546 if (SDValue NarrowLoad = reduceLoadWidth(N))
14547 return NarrowLoad;
14548
14549 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14550 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14551 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14552 if (N0.getOpcode() == ISD::SRL) {
14553 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14554 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14555 // We can turn this into an SRA iff the input to the SRL is already sign
14556 // extended enough.
14557 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14558 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14559 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14560 N0.getOperand(1));
14561 }
14562 }
14563
14564 // fold (sext_inreg (extload x)) -> (sextload x)
14565 // If sextload is not supported by target, we can only do the combine when
14566 // load has one use. Doing otherwise can block folding the extload with other
14567 // extends that the target does support.
14568 if (ISD::isEXTLoad(N0.getNode()) &&
14570 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14571 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14572 N0.hasOneUse()) ||
14573 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14574 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14575 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14576 LN0->getChain(),
14577 LN0->getBasePtr(), ExtVT,
14578 LN0->getMemOperand());
14579 CombineTo(N, ExtLoad);
14580 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14581 AddToWorklist(ExtLoad.getNode());
14582 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14583 }
14584
14585 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14587 N0.hasOneUse() &&
14588 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14589 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14590 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14591 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14592 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14593 LN0->getChain(),
14594 LN0->getBasePtr(), ExtVT,
14595 LN0->getMemOperand());
14596 CombineTo(N, ExtLoad);
14597 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14598 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14599 }
14600
14601 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14602 // ignore it if the masked load is already sign extended
14603 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14604 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14605 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14606 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14607 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14608 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14609 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14610 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14611 CombineTo(N, ExtMaskedLoad);
14612 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14613 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14614 }
14615 }
14616
14617 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14618 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14619 if (SDValue(GN0, 0).hasOneUse() &&
14620 ExtVT == GN0->getMemoryVT() &&
14622 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14623 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14624
14625 SDValue ExtLoad = DAG.getMaskedGather(
14626 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14627 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14628
14629 CombineTo(N, ExtLoad);
14630 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14631 AddToWorklist(ExtLoad.getNode());
14632 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14633 }
14634 }
14635
14636 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14637 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14638 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14639 N0.getOperand(1), false))
14640 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14641 }
14642
14643 // Fold (iM_signext_inreg
14644 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14645 // from iN)
14646 // -> (extract_subvector (signext iN_v to iM))
14647 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14649 SDValue InnerExt = N0.getOperand(0);
14650 EVT InnerExtVT = InnerExt->getValueType(0);
14651 SDValue Extendee = InnerExt->getOperand(0);
14652
14653 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14654 (!LegalOperations ||
14655 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14656 SDValue SignExtExtendee =
14657 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14658 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14659 N0.getOperand(1));
14660 }
14661 }
14662
14663 return SDValue();
14664}
14665
14667 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14668 bool LegalOperations) {
14669 unsigned InregOpcode = N->getOpcode();
14670 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14671
14672 SDValue Src = N->getOperand(0);
14673 EVT VT = N->getValueType(0);
14674 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14675 Src.getValueType().getVectorElementType(),
14677
14678 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14679 "Expected EXTEND_VECTOR_INREG dag node in input!");
14680
14681 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14682 // FIXME: one-use check may be overly restrictive
14683 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14684 return SDValue();
14685
14686 // Profitability check: we must be extending exactly one of it's operands.
14687 // FIXME: this is probably overly restrictive.
14688 Src = Src.getOperand(0);
14689 if (Src.getValueType() != SrcVT)
14690 return SDValue();
14691
14692 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14693 return SDValue();
14694
14695 return DAG.getNode(Opcode, DL, VT, Src);
14696}
14697
14698SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14699 SDValue N0 = N->getOperand(0);
14700 EVT VT = N->getValueType(0);
14701 SDLoc DL(N);
14702
14703 if (N0.isUndef()) {
14704 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14705 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14706 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14707 ? DAG.getUNDEF(VT)
14708 : DAG.getConstant(0, DL, VT);
14709 }
14710
14711 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14712 return Res;
14713
14715 return SDValue(N, 0);
14716
14718 LegalOperations))
14719 return R;
14720
14721 return SDValue();
14722}
14723
14724SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14725 SDValue N0 = N->getOperand(0);
14726 EVT VT = N->getValueType(0);
14727 EVT SrcVT = N0.getValueType();
14728 bool isLE = DAG.getDataLayout().isLittleEndian();
14729 SDLoc DL(N);
14730
14731 // trunc(undef) = undef
14732 if (N0.isUndef())
14733 return DAG.getUNDEF(VT);
14734
14735 // fold (truncate (truncate x)) -> (truncate x)
14736 if (N0.getOpcode() == ISD::TRUNCATE)
14737 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14738
14739 // fold (truncate c1) -> c1
14740 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14741 return C;
14742
14743 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14744 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14745 N0.getOpcode() == ISD::SIGN_EXTEND ||
14746 N0.getOpcode() == ISD::ANY_EXTEND) {
14747 // if the source is smaller than the dest, we still need an extend.
14748 if (N0.getOperand(0).getValueType().bitsLT(VT))
14749 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14750 // if the source is larger than the dest, than we just need the truncate.
14751 if (N0.getOperand(0).getValueType().bitsGT(VT))
14752 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14753 // if the source and dest are the same type, we can drop both the extend
14754 // and the truncate.
14755 return N0.getOperand(0);
14756 }
14757
14758 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14759 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14760 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14761 N0.hasOneUse()) {
14762 SDValue X = N0.getOperand(0);
14763 SDValue ExtVal = N0.getOperand(1);
14764 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14765 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14766 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14767 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14768 }
14769 }
14770
14771 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14772 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14773 return SDValue();
14774
14775 // Fold extract-and-trunc into a narrow extract. For example:
14776 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14777 // i32 y = TRUNCATE(i64 x)
14778 // -- becomes --
14779 // v16i8 b = BITCAST (v2i64 val)
14780 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14781 //
14782 // Note: We only run this optimization after type legalization (which often
14783 // creates this pattern) and before operation legalization after which
14784 // we need to be more careful about the vector instructions that we generate.
14785 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14786 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14787 EVT VecTy = N0.getOperand(0).getValueType();
14788 EVT ExTy = N0.getValueType();
14789 EVT TrTy = N->getValueType(0);
14790
14791 auto EltCnt = VecTy.getVectorElementCount();
14792 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14793 auto NewEltCnt = EltCnt * SizeRatio;
14794
14795 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14796 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14797
14798 SDValue EltNo = N0->getOperand(1);
14799 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14800 int Elt = EltNo->getAsZExtVal();
14801 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14802 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14803 DAG.getBitcast(NVT, N0.getOperand(0)),
14805 }
14806 }
14807
14808 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14809 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14810 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14811 TLI.isTruncateFree(SrcVT, VT)) {
14812 SDLoc SL(N0);
14813 SDValue Cond = N0.getOperand(0);
14814 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14815 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14816 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14817 }
14818 }
14819
14820 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14821 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14822 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14823 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14824 SDValue Amt = N0.getOperand(1);
14825 KnownBits Known = DAG.computeKnownBits(Amt);
14826 unsigned Size = VT.getScalarSizeInBits();
14827 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14828 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14829 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14830 if (AmtVT != Amt.getValueType()) {
14831 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14832 AddToWorklist(Amt.getNode());
14833 }
14834 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14835 }
14836 }
14837
14838 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14839 return V;
14840
14841 if (SDValue ABD = foldABSToABD(N, DL))
14842 return ABD;
14843
14844 // Attempt to pre-truncate BUILD_VECTOR sources.
14845 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14846 N0.hasOneUse() &&
14847 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14848 // Avoid creating illegal types if running after type legalizer.
14849 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14850 EVT SVT = VT.getScalarType();
14851 SmallVector<SDValue, 8> TruncOps;
14852 for (const SDValue &Op : N0->op_values()) {
14853 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14854 TruncOps.push_back(TruncOp);
14855 }
14856 return DAG.getBuildVector(VT, DL, TruncOps);
14857 }
14858
14859 // trunc (splat_vector x) -> splat_vector (trunc x)
14860 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14861 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14862 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14863 EVT SVT = VT.getScalarType();
14864 return DAG.getSplatVector(
14865 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14866 }
14867
14868 // Fold a series of buildvector, bitcast, and truncate if possible.
14869 // For example fold
14870 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14871 // (2xi32 (buildvector x, y)).
14872 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14873 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14875 N0.getOperand(0).hasOneUse()) {
14876 SDValue BuildVect = N0.getOperand(0);
14877 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14878 EVT TruncVecEltTy = VT.getVectorElementType();
14879
14880 // Check that the element types match.
14881 if (BuildVectEltTy == TruncVecEltTy) {
14882 // Now we only need to compute the offset of the truncated elements.
14883 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14884 unsigned TruncVecNumElts = VT.getVectorNumElements();
14885 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14886
14887 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14888 "Invalid number of elements");
14889
14891 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14892 Opnds.push_back(BuildVect.getOperand(i));
14893
14894 return DAG.getBuildVector(VT, DL, Opnds);
14895 }
14896 }
14897
14898 // fold (truncate (load x)) -> (smaller load x)
14899 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14900 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14901 if (SDValue Reduced = reduceLoadWidth(N))
14902 return Reduced;
14903
14904 // Handle the case where the truncated result is at least as wide as the
14905 // loaded type.
14906 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14907 auto *LN0 = cast<LoadSDNode>(N0);
14908 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14909 SDValue NewLoad = DAG.getExtLoad(
14910 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14911 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14912 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14913 return NewLoad;
14914 }
14915 }
14916 }
14917
14918 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14919 // where ... are all 'undef'.
14920 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14922 SDValue V;
14923 unsigned Idx = 0;
14924 unsigned NumDefs = 0;
14925
14926 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14927 SDValue X = N0.getOperand(i);
14928 if (!X.isUndef()) {
14929 V = X;
14930 Idx = i;
14931 NumDefs++;
14932 }
14933 // Stop if more than one members are non-undef.
14934 if (NumDefs > 1)
14935 break;
14936
14939 X.getValueType().getVectorElementCount()));
14940 }
14941
14942 if (NumDefs == 0)
14943 return DAG.getUNDEF(VT);
14944
14945 if (NumDefs == 1) {
14946 assert(V.getNode() && "The single defined operand is empty!");
14948 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14949 if (i != Idx) {
14950 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14951 continue;
14952 }
14953 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14954 AddToWorklist(NV.getNode());
14955 Opnds.push_back(NV);
14956 }
14957 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
14958 }
14959 }
14960
14961 // Fold truncate of a bitcast of a vector to an extract of the low vector
14962 // element.
14963 //
14964 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14965 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14966 SDValue VecSrc = N0.getOperand(0);
14967 EVT VecSrcVT = VecSrc.getValueType();
14968 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14969 (!LegalOperations ||
14970 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14971 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14972 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
14974 }
14975 }
14976
14977 // Simplify the operands using demanded-bits information.
14979 return SDValue(N, 0);
14980
14981 // fold (truncate (extract_subvector(ext x))) ->
14982 // (extract_subvector x)
14983 // TODO: This can be generalized to cover cases where the truncate and extract
14984 // do not fully cancel each other out.
14985 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14986 SDValue N00 = N0.getOperand(0);
14987 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14988 N00.getOpcode() == ISD::ZERO_EXTEND ||
14989 N00.getOpcode() == ISD::ANY_EXTEND) {
14990 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14992 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14993 N00.getOperand(0), N0.getOperand(1));
14994 }
14995 }
14996
14997 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14998 return NewVSel;
14999
15000 // Narrow a suitable binary operation with a non-opaque constant operand by
15001 // moving it ahead of the truncate. This is limited to pre-legalization
15002 // because targets may prefer a wider type during later combines and invert
15003 // this transform.
15004 switch (N0.getOpcode()) {
15005 case ISD::ADD:
15006 case ISD::SUB:
15007 case ISD::MUL:
15008 case ISD::AND:
15009 case ISD::OR:
15010 case ISD::XOR:
15011 if (!LegalOperations && N0.hasOneUse() &&
15012 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15013 isConstantOrConstantVector(N0.getOperand(1), true))) {
15014 // TODO: We already restricted this to pre-legalization, but for vectors
15015 // we are extra cautious to not create an unsupported operation.
15016 // Target-specific changes are likely needed to avoid regressions here.
15017 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15018 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15019 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15020 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15021 }
15022 }
15023 break;
15024 case ISD::ADDE:
15025 case ISD::UADDO_CARRY:
15026 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15027 // (trunc uaddo_carry(X, Y, Carry)) ->
15028 // (uaddo_carry trunc(X), trunc(Y), Carry)
15029 // When the adde's carry is not used.
15030 // We only do for uaddo_carry before legalize operation
15031 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15032 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15033 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15034 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15035 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15036 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15037 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15038 }
15039 break;
15040 case ISD::USUBSAT:
15041 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15042 // enough to know that the upper bits are zero we must ensure that we don't
15043 // introduce an extra truncate.
15044 if (!LegalOperations && N0.hasOneUse() &&
15047 VT.getScalarSizeInBits() &&
15048 hasOperation(N0.getOpcode(), VT)) {
15049 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15050 DAG, DL);
15051 }
15052 break;
15053 }
15054
15055 return SDValue();
15056}
15057
15058static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15059 SDValue Elt = N->getOperand(i);
15060 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15061 return Elt.getNode();
15062 return Elt.getOperand(Elt.getResNo()).getNode();
15063}
15064
15065/// build_pair (load, load) -> load
15066/// if load locations are consecutive.
15067SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15068 assert(N->getOpcode() == ISD::BUILD_PAIR);
15069
15070 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15071 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15072
15073 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15074 // most significant part in elt 1. So when combining into one large load, we
15075 // need to consider the endianness.
15076 if (DAG.getDataLayout().isBigEndian())
15077 std::swap(LD1, LD2);
15078
15079 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15080 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15081 LD1->getAddressSpace() != LD2->getAddressSpace())
15082 return SDValue();
15083
15084 unsigned LD1Fast = 0;
15085 EVT LD1VT = LD1->getValueType(0);
15086 unsigned LD1Bytes = LD1VT.getStoreSize();
15087 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15088 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15089 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15090 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15091 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15092 LD1->getPointerInfo(), LD1->getAlign());
15093
15094 return SDValue();
15095}
15096
15097static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15098 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15099 // and Lo parts; on big-endian machines it doesn't.
15100 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15101}
15102
15103SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15104 const TargetLowering &TLI) {
15105 // If this is not a bitcast to an FP type or if the target doesn't have
15106 // IEEE754-compliant FP logic, we're done.
15107 EVT VT = N->getValueType(0);
15108 SDValue N0 = N->getOperand(0);
15109 EVT SourceVT = N0.getValueType();
15110
15111 if (!VT.isFloatingPoint())
15112 return SDValue();
15113
15114 // TODO: Handle cases where the integer constant is a different scalar
15115 // bitwidth to the FP.
15116 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15117 return SDValue();
15118
15119 unsigned FPOpcode;
15120 APInt SignMask;
15121 switch (N0.getOpcode()) {
15122 case ISD::AND:
15123 FPOpcode = ISD::FABS;
15124 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15125 break;
15126 case ISD::XOR:
15127 FPOpcode = ISD::FNEG;
15128 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15129 break;
15130 case ISD::OR:
15131 FPOpcode = ISD::FABS;
15132 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15133 break;
15134 default:
15135 return SDValue();
15136 }
15137
15138 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15139 return SDValue();
15140
15141 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15142 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15143 // removing this would require more changes.
15144 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15145 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15146 return true;
15147
15148 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15149 };
15150
15151 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15152 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15153 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15154 // fneg (fabs X)
15155 SDValue LogicOp0 = N0.getOperand(0);
15156 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15157 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15158 IsBitCastOrFree(LogicOp0, VT)) {
15159 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15160 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15161 NumFPLogicOpsConv++;
15162 if (N0.getOpcode() == ISD::OR)
15163 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15164 return FPOp;
15165 }
15166
15167 return SDValue();
15168}
15169
15170SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15171 SDValue N0 = N->getOperand(0);
15172 EVT VT = N->getValueType(0);
15173
15174 if (N0.isUndef())
15175 return DAG.getUNDEF(VT);
15176
15177 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15178 // Only do this before legalize types, unless both types are integer and the
15179 // scalar type is legal. Only do this before legalize ops, since the target
15180 // maybe depending on the bitcast.
15181 // First check to see if this is all constant.
15182 // TODO: Support FP bitcasts after legalize types.
15183 if (VT.isVector() &&
15184 (!LegalTypes ||
15185 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15186 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15187 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15188 cast<BuildVectorSDNode>(N0)->isConstant())
15189 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15191
15192 // If the input is a constant, let getNode fold it.
15193 if (isIntOrFPConstant(N0)) {
15194 // If we can't allow illegal operations, we need to check that this is just
15195 // a fp -> int or int -> conversion and that the resulting operation will
15196 // be legal.
15197 if (!LegalOperations ||
15198 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15200 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15201 TLI.isOperationLegal(ISD::Constant, VT))) {
15202 SDValue C = DAG.getBitcast(VT, N0);
15203 if (C.getNode() != N)
15204 return C;
15205 }
15206 }
15207
15208 // (conv (conv x, t1), t2) -> (conv x, t2)
15209 if (N0.getOpcode() == ISD::BITCAST)
15210 return DAG.getBitcast(VT, N0.getOperand(0));
15211
15212 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15213 // iff the current bitwise logicop type isn't legal
15214 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15215 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15216 auto IsFreeBitcast = [VT](SDValue V) {
15217 return (V.getOpcode() == ISD::BITCAST &&
15218 V.getOperand(0).getValueType() == VT) ||
15220 V->hasOneUse());
15221 };
15222 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15223 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15224 DAG.getBitcast(VT, N0.getOperand(0)),
15225 DAG.getBitcast(VT, N0.getOperand(1)));
15226 }
15227
15228 // fold (conv (load x)) -> (load (conv*)x)
15229 // If the resultant load doesn't need a higher alignment than the original!
15230 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15231 // Do not remove the cast if the types differ in endian layout.
15233 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15234 // If the load is volatile, we only want to change the load type if the
15235 // resulting load is legal. Otherwise we might increase the number of
15236 // memory accesses. We don't care if the original type was legal or not
15237 // as we assume software couldn't rely on the number of accesses of an
15238 // illegal type.
15239 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15240 TLI.isOperationLegal(ISD::LOAD, VT))) {
15241 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15242
15243 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15244 *LN0->getMemOperand())) {
15245 SDValue Load =
15246 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15247 LN0->getMemOperand());
15248 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15249 return Load;
15250 }
15251 }
15252
15253 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15254 return V;
15255
15256 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15257 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15258 //
15259 // For ppc_fp128:
15260 // fold (bitcast (fneg x)) ->
15261 // flipbit = signbit
15262 // (xor (bitcast x) (build_pair flipbit, flipbit))
15263 //
15264 // fold (bitcast (fabs x)) ->
15265 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15266 // (xor (bitcast x) (build_pair flipbit, flipbit))
15267 // This often reduces constant pool loads.
15268 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15269 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15270 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15271 !N0.getValueType().isVector()) {
15272 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15273 AddToWorklist(NewConv.getNode());
15274
15275 SDLoc DL(N);
15276 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15277 assert(VT.getSizeInBits() == 128);
15278 SDValue SignBit = DAG.getConstant(
15279 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15280 SDValue FlipBit;
15281 if (N0.getOpcode() == ISD::FNEG) {
15282 FlipBit = SignBit;
15283 AddToWorklist(FlipBit.getNode());
15284 } else {
15285 assert(N0.getOpcode() == ISD::FABS);
15286 SDValue Hi =
15287 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15289 SDLoc(NewConv)));
15290 AddToWorklist(Hi.getNode());
15291 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15292 AddToWorklist(FlipBit.getNode());
15293 }
15294 SDValue FlipBits =
15295 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15296 AddToWorklist(FlipBits.getNode());
15297 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15298 }
15299 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15300 if (N0.getOpcode() == ISD::FNEG)
15301 return DAG.getNode(ISD::XOR, DL, VT,
15302 NewConv, DAG.getConstant(SignBit, DL, VT));
15303 assert(N0.getOpcode() == ISD::FABS);
15304 return DAG.getNode(ISD::AND, DL, VT,
15305 NewConv, DAG.getConstant(~SignBit, DL, VT));
15306 }
15307
15308 // fold (bitconvert (fcopysign cst, x)) ->
15309 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15310 // Note that we don't handle (copysign x, cst) because this can always be
15311 // folded to an fneg or fabs.
15312 //
15313 // For ppc_fp128:
15314 // fold (bitcast (fcopysign cst, x)) ->
15315 // flipbit = (and (extract_element
15316 // (xor (bitcast cst), (bitcast x)), 0),
15317 // signbit)
15318 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15319 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15320 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15321 !VT.isVector()) {
15322 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15323 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15324 if (isTypeLegal(IntXVT)) {
15325 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15326 AddToWorklist(X.getNode());
15327
15328 // If X has a different width than the result/lhs, sext it or truncate it.
15329 unsigned VTWidth = VT.getSizeInBits();
15330 if (OrigXWidth < VTWidth) {
15331 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15332 AddToWorklist(X.getNode());
15333 } else if (OrigXWidth > VTWidth) {
15334 // To get the sign bit in the right place, we have to shift it right
15335 // before truncating.
15336 SDLoc DL(X);
15337 X = DAG.getNode(ISD::SRL, DL,
15338 X.getValueType(), X,
15339 DAG.getConstant(OrigXWidth-VTWidth, DL,
15340 X.getValueType()));
15341 AddToWorklist(X.getNode());
15342 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15343 AddToWorklist(X.getNode());
15344 }
15345
15346 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15347 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15348 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15349 AddToWorklist(Cst.getNode());
15350 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15351 AddToWorklist(X.getNode());
15352 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15353 AddToWorklist(XorResult.getNode());
15354 SDValue XorResult64 = DAG.getNode(
15355 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15357 SDLoc(XorResult)));
15358 AddToWorklist(XorResult64.getNode());
15359 SDValue FlipBit =
15360 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15361 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15362 AddToWorklist(FlipBit.getNode());
15363 SDValue FlipBits =
15364 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15365 AddToWorklist(FlipBits.getNode());
15366 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15367 }
15368 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15369 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15370 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15371 AddToWorklist(X.getNode());
15372
15373 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15374 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15375 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15376 AddToWorklist(Cst.getNode());
15377
15378 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15379 }
15380 }
15381
15382 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15383 if (N0.getOpcode() == ISD::BUILD_PAIR)
15384 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15385 return CombineLD;
15386
15387 // Remove double bitcasts from shuffles - this is often a legacy of
15388 // XformToShuffleWithZero being used to combine bitmaskings (of
15389 // float vectors bitcast to integer vectors) into shuffles.
15390 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15391 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15392 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15395 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15396
15397 // If operands are a bitcast, peek through if it casts the original VT.
15398 // If operands are a constant, just bitcast back to original VT.
15399 auto PeekThroughBitcast = [&](SDValue Op) {
15400 if (Op.getOpcode() == ISD::BITCAST &&
15401 Op.getOperand(0).getValueType() == VT)
15402 return SDValue(Op.getOperand(0));
15403 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15404 return DAG.getBitcast(VT, Op);
15405 return SDValue();
15406 };
15407
15408 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15409 // the result type of this bitcast. This would eliminate at least one
15410 // bitcast. See the transform in InstCombine.
15411 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15412 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15413 if (!(SV0 && SV1))
15414 return SDValue();
15415
15416 int MaskScale =
15418 SmallVector<int, 8> NewMask;
15419 for (int M : SVN->getMask())
15420 for (int i = 0; i != MaskScale; ++i)
15421 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15422
15423 SDValue LegalShuffle =
15424 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15425 if (LegalShuffle)
15426 return LegalShuffle;
15427 }
15428
15429 return SDValue();
15430}
15431
15432SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15433 EVT VT = N->getValueType(0);
15434 return CombineConsecutiveLoads(N, VT);
15435}
15436
15437SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15438 SDValue N0 = N->getOperand(0);
15439
15440 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15441 return N0;
15442
15443 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15444 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15445 // example https://reviews.llvm.org/D136529#4120959.
15446 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15447 return SDValue();
15448
15449 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15450 // Try to push freeze through instructions that propagate but don't produce
15451 // poison as far as possible. If an operand of freeze follows three
15452 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15453 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15454 // the freeze through to the operands that are not guaranteed non-poison.
15455 // NOTE: we will strip poison-generating flags, so ignore them here.
15456 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15457 /*ConsiderFlags*/ false) ||
15458 N0->getNumValues() != 1 || !N0->hasOneUse())
15459 return SDValue();
15460
15461 bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15462 N0.getOpcode() == ISD::BUILD_PAIR ||
15464
15465 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15466 // ones" or "constant" into something that depends on FrozenUndef. We can
15467 // instead pick undef values to keep those properties, while at the same time
15468 // folding away the freeze.
15469 // If we implement a more general solution for folding away freeze(undef) in
15470 // the future, then this special handling can be removed.
15471 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15472 SDLoc DL(N0);
15473 EVT VT = N0.getValueType();
15475 return DAG.getAllOnesConstant(DL, VT);
15478 for (const SDValue &Op : N0->op_values())
15479 NewVecC.push_back(
15480 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15481 return DAG.getBuildVector(VT, DL, NewVecC);
15482 }
15483 }
15484
15485 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15486 for (SDValue Op : N0->ops()) {
15487 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15488 /*Depth*/ 1))
15489 continue;
15490 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15491 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15492 if (!HadMaybePoisonOperands)
15493 continue;
15494 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15495 // Multiple maybe-poison ops when not allowed - bail out.
15496 return SDValue();
15497 }
15498 }
15499 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15500 // it could create undef or poison due to it's poison-generating flags.
15501 // So not finding any maybe-poison operands is fine.
15502
15503 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15504 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15505 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15506 continue;
15507 // First, freeze each offending operand.
15508 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15509 // Then, change all other uses of unfrozen operand to use frozen operand.
15510 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15511 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15512 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15513 // But, that also updated the use in the freeze we just created, thus
15514 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15515 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15516 MaybePoisonOperand);
15517 }
15518 }
15519
15520 // This node has been merged with another.
15521 if (N->getOpcode() == ISD::DELETED_NODE)
15522 return SDValue(N, 0);
15523
15524 // The whole node may have been updated, so the value we were holding
15525 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15526 N0 = N->getOperand(0);
15527
15528 // Finally, recreate the node, it's operands were updated to use
15529 // frozen operands, so we just need to use it's "original" operands.
15530 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15531 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15532 for (SDValue &Op : Ops) {
15533 if (Op.getOpcode() == ISD::UNDEF)
15534 Op = DAG.getFreeze(Op);
15535 }
15536 // NOTE: this strips poison generating flags.
15537 SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15538 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15539 "Can't create node that may be undef/poison!");
15540 return R;
15541}
15542
15543/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15544/// operands. DstEltVT indicates the destination element value type.
15545SDValue DAGCombiner::
15546ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15547 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15548
15549 // If this is already the right type, we're done.
15550 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15551
15552 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15553 unsigned DstBitSize = DstEltVT.getSizeInBits();
15554
15555 // If this is a conversion of N elements of one type to N elements of another
15556 // type, convert each element. This handles FP<->INT cases.
15557 if (SrcBitSize == DstBitSize) {
15559 for (SDValue Op : BV->op_values()) {
15560 // If the vector element type is not legal, the BUILD_VECTOR operands
15561 // are promoted and implicitly truncated. Make that explicit here.
15562 if (Op.getValueType() != SrcEltVT)
15563 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15564 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15565 AddToWorklist(Ops.back().getNode());
15566 }
15567 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15569 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15570 }
15571
15572 // Otherwise, we're growing or shrinking the elements. To avoid having to
15573 // handle annoying details of growing/shrinking FP values, we convert them to
15574 // int first.
15575 if (SrcEltVT.isFloatingPoint()) {
15576 // Convert the input float vector to a int vector where the elements are the
15577 // same sizes.
15578 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15579 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15580 SrcEltVT = IntVT;
15581 }
15582
15583 // Now we know the input is an integer vector. If the output is a FP type,
15584 // convert to integer first, then to FP of the right size.
15585 if (DstEltVT.isFloatingPoint()) {
15586 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15587 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15588
15589 // Next, convert to FP elements of the same size.
15590 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15591 }
15592
15593 // Okay, we know the src/dst types are both integers of differing types.
15594 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15595
15596 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15597 // BuildVectorSDNode?
15598 auto *BVN = cast<BuildVectorSDNode>(BV);
15599
15600 // Extract the constant raw bit data.
15601 BitVector UndefElements;
15602 SmallVector<APInt> RawBits;
15603 bool IsLE = DAG.getDataLayout().isLittleEndian();
15604 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15605 return SDValue();
15606
15607 SDLoc DL(BV);
15609 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15610 if (UndefElements[I])
15611 Ops.push_back(DAG.getUNDEF(DstEltVT));
15612 else
15613 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15614 }
15615
15616 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15617 return DAG.getBuildVector(VT, DL, Ops);
15618}
15619
15620// Returns true if floating point contraction is allowed on the FMUL-SDValue
15621// `N`
15623 assert(N.getOpcode() == ISD::FMUL);
15624
15625 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15626 N->getFlags().hasAllowContract();
15627}
15628
15629// Returns true if `N` can assume no infinities involved in its computation.
15631 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15632}
15633
15634/// Try to perform FMA combining on a given FADD node.
15635template <class MatchContextClass>
15636SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15637 SDValue N0 = N->getOperand(0);
15638 SDValue N1 = N->getOperand(1);
15639 EVT VT = N->getValueType(0);
15640 SDLoc SL(N);
15641 MatchContextClass matcher(DAG, TLI, N);
15642 const TargetOptions &Options = DAG.getTarget().Options;
15643
15644 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15645
15646 // Floating-point multiply-add with intermediate rounding.
15647 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15648 // FIXME: Add VP_FMAD opcode.
15649 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15650
15651 // Floating-point multiply-add without intermediate rounding.
15652 bool HasFMA =
15654 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15655
15656 // No valid opcode, do not combine.
15657 if (!HasFMAD && !HasFMA)
15658 return SDValue();
15659
15660 bool CanReassociate =
15661 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15662 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15663 Options.UnsafeFPMath || HasFMAD);
15664 // If the addition is not contractable, do not combine.
15665 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15666 return SDValue();
15667
15668 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15669 // beneficial. It does not reduce latency. It increases register pressure. It
15670 // replaces an fadd with an fma which is a more complex instruction, so is
15671 // likely to have a larger encoding, use more functional units, etc.
15672 if (N0 == N1)
15673 return SDValue();
15674
15675 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15676 return SDValue();
15677
15678 // Always prefer FMAD to FMA for precision.
15679 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15681
15682 auto isFusedOp = [&](SDValue N) {
15683 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15684 };
15685
15686 // Is the node an FMUL and contractable either due to global flags or
15687 // SDNodeFlags.
15688 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15689 if (!matcher.match(N, ISD::FMUL))
15690 return false;
15691 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15692 };
15693 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15694 // prefer to fold the multiply with fewer uses.
15696 if (N0->use_size() > N1->use_size())
15697 std::swap(N0, N1);
15698 }
15699
15700 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15701 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15702 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15703 N0.getOperand(1), N1);
15704 }
15705
15706 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15707 // Note: Commutes FADD operands.
15708 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15709 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15710 N1.getOperand(1), N0);
15711 }
15712
15713 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15714 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15715 // This also works with nested fma instructions:
15716 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15717 // fma A, B, (fma C, D, fma (E, F, G))
15718 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15719 // fma A, B, (fma C, D, fma (E, F, G)).
15720 // This requires reassociation because it changes the order of operations.
15721 if (CanReassociate) {
15722 SDValue FMA, E;
15723 if (isFusedOp(N0) && N0.hasOneUse()) {
15724 FMA = N0;
15725 E = N1;
15726 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15727 FMA = N1;
15728 E = N0;
15729 }
15730
15731 SDValue TmpFMA = FMA;
15732 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15733 SDValue FMul = TmpFMA->getOperand(2);
15734 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15735 SDValue C = FMul.getOperand(0);
15736 SDValue D = FMul.getOperand(1);
15737 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15739 // Replacing the inner FMul could cause the outer FMA to be simplified
15740 // away.
15741 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15742 }
15743
15744 TmpFMA = TmpFMA->getOperand(2);
15745 }
15746 }
15747
15748 // Look through FP_EXTEND nodes to do more combining.
15749
15750 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15751 if (matcher.match(N0, ISD::FP_EXTEND)) {
15752 SDValue N00 = N0.getOperand(0);
15753 if (isContractableFMUL(N00) &&
15754 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15755 N00.getValueType())) {
15756 return matcher.getNode(
15757 PreferredFusedOpcode, SL, VT,
15758 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15759 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15760 }
15761 }
15762
15763 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15764 // Note: Commutes FADD operands.
15765 if (matcher.match(N1, ISD::FP_EXTEND)) {
15766 SDValue N10 = N1.getOperand(0);
15767 if (isContractableFMUL(N10) &&
15768 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15769 N10.getValueType())) {
15770 return matcher.getNode(
15771 PreferredFusedOpcode, SL, VT,
15772 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15773 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15774 }
15775 }
15776
15777 // More folding opportunities when target permits.
15778 if (Aggressive) {
15779 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15780 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15781 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15782 SDValue Z) {
15783 return matcher.getNode(
15784 PreferredFusedOpcode, SL, VT, X, Y,
15785 matcher.getNode(PreferredFusedOpcode, SL, VT,
15786 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15787 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15788 };
15789 if (isFusedOp(N0)) {
15790 SDValue N02 = N0.getOperand(2);
15791 if (matcher.match(N02, ISD::FP_EXTEND)) {
15792 SDValue N020 = N02.getOperand(0);
15793 if (isContractableFMUL(N020) &&
15794 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15795 N020.getValueType())) {
15796 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15797 N020.getOperand(0), N020.getOperand(1),
15798 N1);
15799 }
15800 }
15801 }
15802
15803 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15804 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15805 // FIXME: This turns two single-precision and one double-precision
15806 // operation into two double-precision operations, which might not be
15807 // interesting for all targets, especially GPUs.
15808 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15809 SDValue Z) {
15810 return matcher.getNode(
15811 PreferredFusedOpcode, SL, VT,
15812 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15813 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15814 matcher.getNode(PreferredFusedOpcode, SL, VT,
15815 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15816 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15817 };
15818 if (N0.getOpcode() == ISD::FP_EXTEND) {
15819 SDValue N00 = N0.getOperand(0);
15820 if (isFusedOp(N00)) {
15821 SDValue N002 = N00.getOperand(2);
15822 if (isContractableFMUL(N002) &&
15823 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15824 N00.getValueType())) {
15825 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15826 N002.getOperand(0), N002.getOperand(1),
15827 N1);
15828 }
15829 }
15830 }
15831
15832 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15833 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15834 if (isFusedOp(N1)) {
15835 SDValue N12 = N1.getOperand(2);
15836 if (N12.getOpcode() == ISD::FP_EXTEND) {
15837 SDValue N120 = N12.getOperand(0);
15838 if (isContractableFMUL(N120) &&
15839 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15840 N120.getValueType())) {
15841 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15842 N120.getOperand(0), N120.getOperand(1),
15843 N0);
15844 }
15845 }
15846 }
15847
15848 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15849 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15850 // FIXME: This turns two single-precision and one double-precision
15851 // operation into two double-precision operations, which might not be
15852 // interesting for all targets, especially GPUs.
15853 if (N1.getOpcode() == ISD::FP_EXTEND) {
15854 SDValue N10 = N1.getOperand(0);
15855 if (isFusedOp(N10)) {
15856 SDValue N102 = N10.getOperand(2);
15857 if (isContractableFMUL(N102) &&
15858 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15859 N10.getValueType())) {
15860 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15861 N102.getOperand(0), N102.getOperand(1),
15862 N0);
15863 }
15864 }
15865 }
15866 }
15867
15868 return SDValue();
15869}
15870
15871/// Try to perform FMA combining on a given FSUB node.
15872template <class MatchContextClass>
15873SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15874 SDValue N0 = N->getOperand(0);
15875 SDValue N1 = N->getOperand(1);
15876 EVT VT = N->getValueType(0);
15877 SDLoc SL(N);
15878 MatchContextClass matcher(DAG, TLI, N);
15879 const TargetOptions &Options = DAG.getTarget().Options;
15880
15881 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15882
15883 // Floating-point multiply-add with intermediate rounding.
15884 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15885 // FIXME: Add VP_FMAD opcode.
15886 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15887
15888 // Floating-point multiply-add without intermediate rounding.
15889 bool HasFMA =
15891 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15892
15893 // No valid opcode, do not combine.
15894 if (!HasFMAD && !HasFMA)
15895 return SDValue();
15896
15897 const SDNodeFlags Flags = N->getFlags();
15898 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15899 Options.UnsafeFPMath || HasFMAD);
15900
15901 // If the subtraction is not contractable, do not combine.
15902 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15903 return SDValue();
15904
15905 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15906 return SDValue();
15907
15908 // Always prefer FMAD to FMA for precision.
15909 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15911 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15912
15913 // Is the node an FMUL and contractable either due to global flags or
15914 // SDNodeFlags.
15915 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15916 if (!matcher.match(N, ISD::FMUL))
15917 return false;
15918 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15919 };
15920
15921 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15922 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15923 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15924 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15925 XY.getOperand(1),
15926 matcher.getNode(ISD::FNEG, SL, VT, Z));
15927 }
15928 return SDValue();
15929 };
15930
15931 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15932 // Note: Commutes FSUB operands.
15933 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15934 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15935 return matcher.getNode(
15936 PreferredFusedOpcode, SL, VT,
15937 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15938 YZ.getOperand(1), X);
15939 }
15940 return SDValue();
15941 };
15942
15943 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15944 // prefer to fold the multiply with fewer uses.
15945 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15946 (N0->use_size() > N1->use_size())) {
15947 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15948 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15949 return V;
15950 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15951 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15952 return V;
15953 } else {
15954 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15955 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15956 return V;
15957 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15958 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15959 return V;
15960 }
15961
15962 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15963 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15964 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15965 SDValue N00 = N0.getOperand(0).getOperand(0);
15966 SDValue N01 = N0.getOperand(0).getOperand(1);
15967 return matcher.getNode(PreferredFusedOpcode, SL, VT,
15968 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15969 matcher.getNode(ISD::FNEG, SL, VT, N1));
15970 }
15971
15972 // Look through FP_EXTEND nodes to do more combining.
15973
15974 // fold (fsub (fpext (fmul x, y)), z)
15975 // -> (fma (fpext x), (fpext y), (fneg z))
15976 if (matcher.match(N0, ISD::FP_EXTEND)) {
15977 SDValue N00 = N0.getOperand(0);
15978 if (isContractableFMUL(N00) &&
15979 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15980 N00.getValueType())) {
15981 return matcher.getNode(
15982 PreferredFusedOpcode, SL, VT,
15983 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15984 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15985 matcher.getNode(ISD::FNEG, SL, VT, N1));
15986 }
15987 }
15988
15989 // fold (fsub x, (fpext (fmul y, z)))
15990 // -> (fma (fneg (fpext y)), (fpext z), x)
15991 // Note: Commutes FSUB operands.
15992 if (matcher.match(N1, ISD::FP_EXTEND)) {
15993 SDValue N10 = N1.getOperand(0);
15994 if (isContractableFMUL(N10) &&
15995 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15996 N10.getValueType())) {
15997 return matcher.getNode(
15998 PreferredFusedOpcode, SL, VT,
15999 matcher.getNode(
16000 ISD::FNEG, SL, VT,
16001 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16002 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16003 }
16004 }
16005
16006 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16007 // -> (fneg (fma (fpext x), (fpext y), z))
16008 // Note: This could be removed with appropriate canonicalization of the
16009 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16010 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16011 // from implementing the canonicalization in visitFSUB.
16012 if (matcher.match(N0, ISD::FP_EXTEND)) {
16013 SDValue N00 = N0.getOperand(0);
16014 if (matcher.match(N00, ISD::FNEG)) {
16015 SDValue N000 = N00.getOperand(0);
16016 if (isContractableFMUL(N000) &&
16017 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16018 N00.getValueType())) {
16019 return matcher.getNode(
16020 ISD::FNEG, SL, VT,
16021 matcher.getNode(
16022 PreferredFusedOpcode, SL, VT,
16023 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16024 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16025 N1));
16026 }
16027 }
16028 }
16029
16030 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16031 // -> (fneg (fma (fpext x)), (fpext y), z)
16032 // Note: This could be removed with appropriate canonicalization of the
16033 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16034 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16035 // from implementing the canonicalization in visitFSUB.
16036 if (matcher.match(N0, ISD::FNEG)) {
16037 SDValue N00 = N0.getOperand(0);
16038 if (matcher.match(N00, ISD::FP_EXTEND)) {
16039 SDValue N000 = N00.getOperand(0);
16040 if (isContractableFMUL(N000) &&
16041 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16042 N000.getValueType())) {
16043 return matcher.getNode(
16044 ISD::FNEG, SL, VT,
16045 matcher.getNode(
16046 PreferredFusedOpcode, SL, VT,
16047 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16048 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16049 N1));
16050 }
16051 }
16052 }
16053
16054 auto isReassociable = [&Options](SDNode *N) {
16055 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16056 };
16057
16058 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16059 &isReassociable](SDValue N) {
16060 return isContractableFMUL(N) && isReassociable(N.getNode());
16061 };
16062
16063 auto isFusedOp = [&](SDValue N) {
16064 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16065 };
16066
16067 // More folding opportunities when target permits.
16068 if (Aggressive && isReassociable(N)) {
16069 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16070 // fold (fsub (fma x, y, (fmul u, v)), z)
16071 // -> (fma x, y (fma u, v, (fneg z)))
16072 if (CanFuse && isFusedOp(N0) &&
16073 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16074 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16075 return matcher.getNode(
16076 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16077 matcher.getNode(PreferredFusedOpcode, SL, VT,
16078 N0.getOperand(2).getOperand(0),
16079 N0.getOperand(2).getOperand(1),
16080 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16081 }
16082
16083 // fold (fsub x, (fma y, z, (fmul u, v)))
16084 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16085 if (CanFuse && isFusedOp(N1) &&
16086 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16087 N1->hasOneUse() && NoSignedZero) {
16088 SDValue N20 = N1.getOperand(2).getOperand(0);
16089 SDValue N21 = N1.getOperand(2).getOperand(1);
16090 return matcher.getNode(
16091 PreferredFusedOpcode, SL, VT,
16092 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16093 N1.getOperand(1),
16094 matcher.getNode(PreferredFusedOpcode, SL, VT,
16095 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16096 }
16097
16098 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16099 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16100 if (isFusedOp(N0) && N0->hasOneUse()) {
16101 SDValue N02 = N0.getOperand(2);
16102 if (matcher.match(N02, ISD::FP_EXTEND)) {
16103 SDValue N020 = N02.getOperand(0);
16104 if (isContractableAndReassociableFMUL(N020) &&
16105 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16106 N020.getValueType())) {
16107 return matcher.getNode(
16108 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16109 matcher.getNode(
16110 PreferredFusedOpcode, SL, VT,
16111 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16112 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16113 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16114 }
16115 }
16116 }
16117
16118 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16119 // -> (fma (fpext x), (fpext y),
16120 // (fma (fpext u), (fpext v), (fneg z)))
16121 // FIXME: This turns two single-precision and one double-precision
16122 // operation into two double-precision operations, which might not be
16123 // interesting for all targets, especially GPUs.
16124 if (matcher.match(N0, ISD::FP_EXTEND)) {
16125 SDValue N00 = N0.getOperand(0);
16126 if (isFusedOp(N00)) {
16127 SDValue N002 = N00.getOperand(2);
16128 if (isContractableAndReassociableFMUL(N002) &&
16129 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16130 N00.getValueType())) {
16131 return matcher.getNode(
16132 PreferredFusedOpcode, SL, VT,
16133 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16134 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16135 matcher.getNode(
16136 PreferredFusedOpcode, SL, VT,
16137 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16138 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16139 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16140 }
16141 }
16142 }
16143
16144 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16145 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16146 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16147 N1->hasOneUse()) {
16148 SDValue N120 = N1.getOperand(2).getOperand(0);
16149 if (isContractableAndReassociableFMUL(N120) &&
16150 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16151 N120.getValueType())) {
16152 SDValue N1200 = N120.getOperand(0);
16153 SDValue N1201 = N120.getOperand(1);
16154 return matcher.getNode(
16155 PreferredFusedOpcode, SL, VT,
16156 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16157 N1.getOperand(1),
16158 matcher.getNode(
16159 PreferredFusedOpcode, SL, VT,
16160 matcher.getNode(ISD::FNEG, SL, VT,
16161 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16162 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16163 }
16164 }
16165
16166 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16167 // -> (fma (fneg (fpext y)), (fpext z),
16168 // (fma (fneg (fpext u)), (fpext v), x))
16169 // FIXME: This turns two single-precision and one double-precision
16170 // operation into two double-precision operations, which might not be
16171 // interesting for all targets, especially GPUs.
16172 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16173 SDValue CvtSrc = N1.getOperand(0);
16174 SDValue N100 = CvtSrc.getOperand(0);
16175 SDValue N101 = CvtSrc.getOperand(1);
16176 SDValue N102 = CvtSrc.getOperand(2);
16177 if (isContractableAndReassociableFMUL(N102) &&
16178 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16179 CvtSrc.getValueType())) {
16180 SDValue N1020 = N102.getOperand(0);
16181 SDValue N1021 = N102.getOperand(1);
16182 return matcher.getNode(
16183 PreferredFusedOpcode, SL, VT,
16184 matcher.getNode(ISD::FNEG, SL, VT,
16185 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16186 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16187 matcher.getNode(
16188 PreferredFusedOpcode, SL, VT,
16189 matcher.getNode(ISD::FNEG, SL, VT,
16190 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16191 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16192 }
16193 }
16194 }
16195
16196 return SDValue();
16197}
16198
16199/// Try to perform FMA combining on a given FMUL node based on the distributive
16200/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16201/// subtraction instead of addition).
16202SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16203 SDValue N0 = N->getOperand(0);
16204 SDValue N1 = N->getOperand(1);
16205 EVT VT = N->getValueType(0);
16206 SDLoc SL(N);
16207
16208 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16209
16210 const TargetOptions &Options = DAG.getTarget().Options;
16211
16212 // The transforms below are incorrect when x == 0 and y == inf, because the
16213 // intermediate multiplication produces a nan.
16214 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16215 if (!hasNoInfs(Options, FAdd))
16216 return SDValue();
16217
16218 // Floating-point multiply-add without intermediate rounding.
16219 bool HasFMA =
16222 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16223
16224 // Floating-point multiply-add with intermediate rounding. This can result
16225 // in a less precise result due to the changed rounding order.
16226 bool HasFMAD = Options.UnsafeFPMath &&
16227 (LegalOperations && TLI.isFMADLegal(DAG, N));
16228
16229 // No valid opcode, do not combine.
16230 if (!HasFMAD && !HasFMA)
16231 return SDValue();
16232
16233 // Always prefer FMAD to FMA for precision.
16234 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16236
16237 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16238 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16239 auto FuseFADD = [&](SDValue X, SDValue Y) {
16240 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16241 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16242 if (C->isExactlyValue(+1.0))
16243 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16244 Y);
16245 if (C->isExactlyValue(-1.0))
16246 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16247 DAG.getNode(ISD::FNEG, SL, VT, Y));
16248 }
16249 }
16250 return SDValue();
16251 };
16252
16253 if (SDValue FMA = FuseFADD(N0, N1))
16254 return FMA;
16255 if (SDValue FMA = FuseFADD(N1, N0))
16256 return FMA;
16257
16258 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16259 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16260 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16261 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16262 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16263 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16264 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16265 if (C0->isExactlyValue(+1.0))
16266 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16267 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16268 Y);
16269 if (C0->isExactlyValue(-1.0))
16270 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16271 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16272 DAG.getNode(ISD::FNEG, SL, VT, Y));
16273 }
16274 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16275 if (C1->isExactlyValue(+1.0))
16276 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16277 DAG.getNode(ISD::FNEG, SL, VT, Y));
16278 if (C1->isExactlyValue(-1.0))
16279 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16280 Y);
16281 }
16282 }
16283 return SDValue();
16284 };
16285
16286 if (SDValue FMA = FuseFSUB(N0, N1))
16287 return FMA;
16288 if (SDValue FMA = FuseFSUB(N1, N0))
16289 return FMA;
16290
16291 return SDValue();
16292}
16293
16294SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16295 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16296
16297 // FADD -> FMA combines:
16298 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16299 if (Fused.getOpcode() != ISD::DELETED_NODE)
16300 AddToWorklist(Fused.getNode());
16301 return Fused;
16302 }
16303 return SDValue();
16304}
16305
16306SDValue DAGCombiner::visitFADD(SDNode *N) {
16307 SDValue N0 = N->getOperand(0);
16308 SDValue N1 = N->getOperand(1);
16311 EVT VT = N->getValueType(0);
16312 SDLoc DL(N);
16313 const TargetOptions &Options = DAG.getTarget().Options;
16314 SDNodeFlags Flags = N->getFlags();
16315 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16316
16317 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16318 return R;
16319
16320 // fold (fadd c1, c2) -> c1 + c2
16321 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16322 return C;
16323
16324 // canonicalize constant to RHS
16325 if (N0CFP && !N1CFP)
16326 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16327
16328 // fold vector ops
16329 if (VT.isVector())
16330 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16331 return FoldedVOp;
16332
16333 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16334 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16335 if (N1C && N1C->isZero())
16336 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16337 return N0;
16338
16339 if (SDValue NewSel = foldBinOpIntoSelect(N))
16340 return NewSel;
16341
16342 // fold (fadd A, (fneg B)) -> (fsub A, B)
16343 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16344 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16345 N1, DAG, LegalOperations, ForCodeSize))
16346 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16347
16348 // fold (fadd (fneg A), B) -> (fsub B, A)
16349 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16350 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16351 N0, DAG, LegalOperations, ForCodeSize))
16352 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16353
16354 auto isFMulNegTwo = [](SDValue FMul) {
16355 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16356 return false;
16357 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16358 return C && C->isExactlyValue(-2.0);
16359 };
16360
16361 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16362 if (isFMulNegTwo(N0)) {
16363 SDValue B = N0.getOperand(0);
16364 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16365 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16366 }
16367 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16368 if (isFMulNegTwo(N1)) {
16369 SDValue B = N1.getOperand(0);
16370 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16371 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16372 }
16373
16374 // No FP constant should be created after legalization as Instruction
16375 // Selection pass has a hard time dealing with FP constants.
16376 bool AllowNewConst = (Level < AfterLegalizeDAG);
16377
16378 // If nnan is enabled, fold lots of things.
16379 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16380 // If allowed, fold (fadd (fneg x), x) -> 0.0
16381 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16382 return DAG.getConstantFP(0.0, DL, VT);
16383
16384 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16385 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16386 return DAG.getConstantFP(0.0, DL, VT);
16387 }
16388
16389 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16390 // TODO: break out portions of the transformations below for which Unsafe is
16391 // considered and which do not require both nsz and reassoc
16392 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16393 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16394 AllowNewConst) {
16395 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16396 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16398 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16399 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16400 }
16401
16402 // We can fold chains of FADD's of the same value into multiplications.
16403 // This transform is not safe in general because we are reducing the number
16404 // of rounding steps.
16405 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16406 if (N0.getOpcode() == ISD::FMUL) {
16407 SDNode *CFP00 =
16409 SDNode *CFP01 =
16411
16412 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16413 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16414 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16415 DAG.getConstantFP(1.0, DL, VT));
16416 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16417 }
16418
16419 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16420 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16421 N1.getOperand(0) == N1.getOperand(1) &&
16422 N0.getOperand(0) == N1.getOperand(0)) {
16423 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16424 DAG.getConstantFP(2.0, DL, VT));
16425 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16426 }
16427 }
16428
16429 if (N1.getOpcode() == ISD::FMUL) {
16430 SDNode *CFP10 =
16432 SDNode *CFP11 =
16434
16435 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16436 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16437 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16438 DAG.getConstantFP(1.0, DL, VT));
16439 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16440 }
16441
16442 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16443 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16444 N0.getOperand(0) == N0.getOperand(1) &&
16445 N1.getOperand(0) == N0.getOperand(0)) {
16446 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16447 DAG.getConstantFP(2.0, DL, VT));
16448 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16449 }
16450 }
16451
16452 if (N0.getOpcode() == ISD::FADD) {
16453 SDNode *CFP00 =
16455 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16456 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16457 (N0.getOperand(0) == N1)) {
16458 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16459 DAG.getConstantFP(3.0, DL, VT));
16460 }
16461 }
16462
16463 if (N1.getOpcode() == ISD::FADD) {
16464 SDNode *CFP10 =
16466 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16467 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16468 N1.getOperand(0) == N0) {
16469 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16470 DAG.getConstantFP(3.0, DL, VT));
16471 }
16472 }
16473
16474 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16475 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16476 N0.getOperand(0) == N0.getOperand(1) &&
16477 N1.getOperand(0) == N1.getOperand(1) &&
16478 N0.getOperand(0) == N1.getOperand(0)) {
16479 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16480 DAG.getConstantFP(4.0, DL, VT));
16481 }
16482 }
16483
16484 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16485 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16486 VT, N0, N1, Flags))
16487 return SD;
16488 } // enable-unsafe-fp-math
16489
16490 // FADD -> FMA combines:
16491 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16492 if (Fused.getOpcode() != ISD::DELETED_NODE)
16493 AddToWorklist(Fused.getNode());
16494 return Fused;
16495 }
16496 return SDValue();
16497}
16498
16499SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16500 SDValue Chain = N->getOperand(0);
16501 SDValue N0 = N->getOperand(1);
16502 SDValue N1 = N->getOperand(2);
16503 EVT VT = N->getValueType(0);
16504 EVT ChainVT = N->getValueType(1);
16505 SDLoc DL(N);
16506 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16507
16508 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16509 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16510 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16511 N1, DAG, LegalOperations, ForCodeSize)) {
16512 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16513 {Chain, N0, NegN1});
16514 }
16515
16516 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16517 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16518 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16519 N0, DAG, LegalOperations, ForCodeSize)) {
16520 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16521 {Chain, N1, NegN0});
16522 }
16523 return SDValue();
16524}
16525
16526SDValue DAGCombiner::visitFSUB(SDNode *N) {
16527 SDValue N0 = N->getOperand(0);
16528 SDValue N1 = N->getOperand(1);
16529 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16530 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16531 EVT VT = N->getValueType(0);
16532 SDLoc DL(N);
16533 const TargetOptions &Options = DAG.getTarget().Options;
16534 const SDNodeFlags Flags = N->getFlags();
16535 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16536
16537 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16538 return R;
16539
16540 // fold (fsub c1, c2) -> c1-c2
16541 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16542 return C;
16543
16544 // fold vector ops
16545 if (VT.isVector())
16546 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16547 return FoldedVOp;
16548
16549 if (SDValue NewSel = foldBinOpIntoSelect(N))
16550 return NewSel;
16551
16552 // (fsub A, 0) -> A
16553 if (N1CFP && N1CFP->isZero()) {
16554 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16555 Flags.hasNoSignedZeros()) {
16556 return N0;
16557 }
16558 }
16559
16560 if (N0 == N1) {
16561 // (fsub x, x) -> 0.0
16562 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16563 return DAG.getConstantFP(0.0f, DL, VT);
16564 }
16565
16566 // (fsub -0.0, N1) -> -N1
16567 if (N0CFP && N0CFP->isZero()) {
16568 if (N0CFP->isNegative() ||
16569 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16570 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16571 // flushed to zero, unless all users treat denorms as zero (DAZ).
16572 // FIXME: This transform will change the sign of a NaN and the behavior
16573 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16574 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16575 if (DenormMode == DenormalMode::getIEEE()) {
16576 if (SDValue NegN1 =
16577 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16578 return NegN1;
16579 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16580 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16581 }
16582 }
16583 }
16584
16585 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16586 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16587 N1.getOpcode() == ISD::FADD) {
16588 // X - (X + Y) -> -Y
16589 if (N0 == N1->getOperand(0))
16590 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16591 // X - (Y + X) -> -Y
16592 if (N0 == N1->getOperand(1))
16593 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16594 }
16595
16596 // fold (fsub A, (fneg B)) -> (fadd A, B)
16597 if (SDValue NegN1 =
16598 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16599 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16600
16601 // FSUB -> FMA combines:
16602 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16603 AddToWorklist(Fused.getNode());
16604 return Fused;
16605 }
16606
16607 return SDValue();
16608}
16609
16610// Transform IEEE Floats:
16611// (fmul C, (uitofp Pow2))
16612// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16613// (fdiv C, (uitofp Pow2))
16614// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16615//
16616// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16617// there is no need for more than an add/sub.
16618//
16619// This is valid under the following circumstances:
16620// 1) We are dealing with IEEE floats
16621// 2) C is normal
16622// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16623// TODO: Much of this could also be used for generating `ldexp` on targets the
16624// prefer it.
16625SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16626 EVT VT = N->getValueType(0);
16627 SDValue ConstOp, Pow2Op;
16628
16629 std::optional<int> Mantissa;
16630 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16631 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16632 return false;
16633
16634 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16635 Pow2Op = N->getOperand(1 - ConstOpIdx);
16636 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16637 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16638 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16639 return false;
16640
16641 Pow2Op = Pow2Op.getOperand(0);
16642
16643 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16644 // TODO: We could use knownbits to make this bound more precise.
16645 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16646
16647 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16648 if (CFP == nullptr)
16649 return false;
16650
16651 const APFloat &APF = CFP->getValueAPF();
16652
16653 // Make sure we have normal/ieee constant.
16654 if (!APF.isNormal() || !APF.isIEEE())
16655 return false;
16656
16657 // Make sure the floats exponent is within the bounds that this transform
16658 // produces bitwise equals value.
16659 int CurExp = ilogb(APF);
16660 // FMul by pow2 will only increase exponent.
16661 int MinExp =
16662 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16663 // FDiv by pow2 will only decrease exponent.
16664 int MaxExp =
16665 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16666 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16668 return false;
16669
16670 // Finally make sure we actually know the mantissa for the float type.
16671 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16672 if (!Mantissa)
16673 Mantissa = ThisMantissa;
16674
16675 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16676 };
16677
16678 // TODO: We may be able to include undefs.
16679 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16680 };
16681
16682 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16683 return SDValue();
16684
16685 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16686 return SDValue();
16687
16688 // Get log2 after all other checks have taken place. This is because
16689 // BuildLogBase2 may create a new node.
16690 SDLoc DL(N);
16691 // Get Log2 type with same bitwidth as the float type (VT).
16692 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16693 if (VT.isVector())
16694 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16696
16697 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16698 /*InexpensiveOnly*/ true, NewIntVT);
16699 if (!Log2)
16700 return SDValue();
16701
16702 // Perform actual transform.
16703 SDValue MantissaShiftCnt =
16704 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16705 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16706 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16707 // cast. We could implement that by handle here to handle the casts.
16708 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16709 SDValue ResAsInt =
16710 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16711 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16712 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16713 return ResAsFP;
16714}
16715
16716SDValue DAGCombiner::visitFMUL(SDNode *N) {
16717 SDValue N0 = N->getOperand(0);
16718 SDValue N1 = N->getOperand(1);
16719 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16720 EVT VT = N->getValueType(0);
16721 SDLoc DL(N);
16722 const TargetOptions &Options = DAG.getTarget().Options;
16723 const SDNodeFlags Flags = N->getFlags();
16724 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16725
16726 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16727 return R;
16728
16729 // fold (fmul c1, c2) -> c1*c2
16730 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16731 return C;
16732
16733 // canonicalize constant to RHS
16736 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16737
16738 // fold vector ops
16739 if (VT.isVector())
16740 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16741 return FoldedVOp;
16742
16743 if (SDValue NewSel = foldBinOpIntoSelect(N))
16744 return NewSel;
16745
16746 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16747 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16749 N0.getOpcode() == ISD::FMUL) {
16750 SDValue N00 = N0.getOperand(0);
16751 SDValue N01 = N0.getOperand(1);
16752 // Avoid an infinite loop by making sure that N00 is not a constant
16753 // (the inner multiply has not been constant folded yet).
16756 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16757 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16758 }
16759 }
16760
16761 // Match a special-case: we convert X * 2.0 into fadd.
16762 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16763 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16764 N0.getOperand(0) == N0.getOperand(1)) {
16765 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16766 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16767 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16768 }
16769
16770 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16771 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16772 VT, N0, N1, Flags))
16773 return SD;
16774 }
16775
16776 // fold (fmul X, 2.0) -> (fadd X, X)
16777 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16778 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16779
16780 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16781 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16782 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16783 return DAG.getNode(ISD::FSUB, DL, VT,
16784 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16785 }
16786 }
16787
16788 // -N0 * -N1 --> N0 * N1
16793 SDValue NegN0 =
16794 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16795 if (NegN0) {
16796 HandleSDNode NegN0Handle(NegN0);
16797 SDValue NegN1 =
16798 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16799 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16801 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16802 }
16803
16804 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16805 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16806 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16807 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16808 TLI.isOperationLegal(ISD::FABS, VT)) {
16809 SDValue Select = N0, X = N1;
16810 if (Select.getOpcode() != ISD::SELECT)
16811 std::swap(Select, X);
16812
16813 SDValue Cond = Select.getOperand(0);
16814 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16815 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16816
16817 if (TrueOpnd && FalseOpnd &&
16818 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16819 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16820 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16821 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16822 switch (CC) {
16823 default: break;
16824 case ISD::SETOLT:
16825 case ISD::SETULT:
16826 case ISD::SETOLE:
16827 case ISD::SETULE:
16828 case ISD::SETLT:
16829 case ISD::SETLE:
16830 std::swap(TrueOpnd, FalseOpnd);
16831 [[fallthrough]];
16832 case ISD::SETOGT:
16833 case ISD::SETUGT:
16834 case ISD::SETOGE:
16835 case ISD::SETUGE:
16836 case ISD::SETGT:
16837 case ISD::SETGE:
16838 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16839 TLI.isOperationLegal(ISD::FNEG, VT))
16840 return DAG.getNode(ISD::FNEG, DL, VT,
16841 DAG.getNode(ISD::FABS, DL, VT, X));
16842 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16843 return DAG.getNode(ISD::FABS, DL, VT, X);
16844
16845 break;
16846 }
16847 }
16848 }
16849
16850 // FMUL -> FMA combines:
16851 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16852 AddToWorklist(Fused.getNode());
16853 return Fused;
16854 }
16855
16856 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16857 // able to run.
16858 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16859 return R;
16860
16861 return SDValue();
16862}
16863
16864template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16865 SDValue N0 = N->getOperand(0);
16866 SDValue N1 = N->getOperand(1);
16867 SDValue N2 = N->getOperand(2);
16868 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16869 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16870 EVT VT = N->getValueType(0);
16871 SDLoc DL(N);
16872 const TargetOptions &Options = DAG.getTarget().Options;
16873 // FMA nodes have flags that propagate to the created nodes.
16874 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16875 MatchContextClass matcher(DAG, TLI, N);
16876
16877 bool CanReassociate =
16878 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16879
16880 // Constant fold FMA.
16881 if (isa<ConstantFPSDNode>(N0) &&
16882 isa<ConstantFPSDNode>(N1) &&
16883 isa<ConstantFPSDNode>(N2)) {
16884 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16885 }
16886
16887 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16892 SDValue NegN0 =
16893 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16894 if (NegN0) {
16895 HandleSDNode NegN0Handle(NegN0);
16896 SDValue NegN1 =
16897 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16898 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16900 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16901 }
16902
16903 // FIXME: use fast math flags instead of Options.UnsafeFPMath
16904 if (Options.UnsafeFPMath) {
16905 if (N0CFP && N0CFP->isZero())
16906 return N2;
16907 if (N1CFP && N1CFP->isZero())
16908 return N2;
16909 }
16910
16911 // FIXME: Support splat of constant.
16912 if (N0CFP && N0CFP->isExactlyValue(1.0))
16913 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16914 if (N1CFP && N1CFP->isExactlyValue(1.0))
16915 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16916
16917 // Canonicalize (fma c, x, y) -> (fma x, c, y)
16920 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16921
16922 if (CanReassociate) {
16923 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16924 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16927 return matcher.getNode(
16928 ISD::FMUL, DL, VT, N0,
16929 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16930 }
16931
16932 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16933 if (matcher.match(N0, ISD::FMUL) &&
16936 return matcher.getNode(
16937 ISD::FMA, DL, VT, N0.getOperand(0),
16938 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16939 }
16940 }
16941
16942 // (fma x, -1, y) -> (fadd (fneg x), y)
16943 // FIXME: Support splat of constant.
16944 if (N1CFP) {
16945 if (N1CFP->isExactlyValue(1.0))
16946 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16947
16948 if (N1CFP->isExactlyValue(-1.0) &&
16949 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16950 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16951 AddToWorklist(RHSNeg.getNode());
16952 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16953 }
16954
16955 // fma (fneg x), K, y -> fma x -K, y
16956 if (matcher.match(N0, ISD::FNEG) &&
16958 (N1.hasOneUse() &&
16959 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16960 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16961 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16962 }
16963 }
16964
16965 // FIXME: Support splat of constant.
16966 if (CanReassociate) {
16967 // (fma x, c, x) -> (fmul x, (c+1))
16968 if (N1CFP && N0 == N2) {
16969 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16970 matcher.getNode(ISD::FADD, DL, VT, N1,
16971 DAG.getConstantFP(1.0, DL, VT)));
16972 }
16973
16974 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16975 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16976 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16977 matcher.getNode(ISD::FADD, DL, VT, N1,
16978 DAG.getConstantFP(-1.0, DL, VT)));
16979 }
16980 }
16981
16982 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16983 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16984 if (!TLI.isFNegFree(VT))
16986 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
16987 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
16988 return SDValue();
16989}
16990
16991SDValue DAGCombiner::visitFMAD(SDNode *N) {
16992 SDValue N0 = N->getOperand(0);
16993 SDValue N1 = N->getOperand(1);
16994 SDValue N2 = N->getOperand(2);
16995 EVT VT = N->getValueType(0);
16996 SDLoc DL(N);
16997
16998 // Constant fold FMAD.
16999 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17000 isa<ConstantFPSDNode>(N2))
17001 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17002
17003 return SDValue();
17004}
17005
17006// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17007// reciprocal.
17008// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17009// Notice that this is not always beneficial. One reason is different targets
17010// may have different costs for FDIV and FMUL, so sometimes the cost of two
17011// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17012// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17013SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17014 // TODO: Limit this transform based on optsize/minsize - it always creates at
17015 // least 1 extra instruction. But the perf win may be substantial enough
17016 // that only minsize should restrict this.
17017 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17018 const SDNodeFlags Flags = N->getFlags();
17019 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17020 return SDValue();
17021
17022 // Skip if current node is a reciprocal/fneg-reciprocal.
17023 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17024 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17025 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17026 return SDValue();
17027
17028 // Exit early if the target does not want this transform or if there can't
17029 // possibly be enough uses of the divisor to make the transform worthwhile.
17030 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17031
17032 // For splat vectors, scale the number of uses by the splat factor. If we can
17033 // convert the division into a scalar op, that will likely be much faster.
17034 unsigned NumElts = 1;
17035 EVT VT = N->getValueType(0);
17036 if (VT.isVector() && DAG.isSplatValue(N1))
17037 NumElts = VT.getVectorMinNumElements();
17038
17039 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17040 return SDValue();
17041
17042 // Find all FDIV users of the same divisor.
17043 // Use a set because duplicates may be present in the user list.
17045 for (auto *U : N1->uses()) {
17046 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17047 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17048 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17049 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17050 U->getFlags().hasAllowReassociation() &&
17051 U->getFlags().hasNoSignedZeros())
17052 continue;
17053
17054 // This division is eligible for optimization only if global unsafe math
17055 // is enabled or if this division allows reciprocal formation.
17056 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17057 Users.insert(U);
17058 }
17059 }
17060
17061 // Now that we have the actual number of divisor uses, make sure it meets
17062 // the minimum threshold specified by the target.
17063 if ((Users.size() * NumElts) < MinUses)
17064 return SDValue();
17065
17066 SDLoc DL(N);
17067 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17068 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17069
17070 // Dividend / Divisor -> Dividend * Reciprocal
17071 for (auto *U : Users) {
17072 SDValue Dividend = U->getOperand(0);
17073 if (Dividend != FPOne) {
17074 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17075 Reciprocal, Flags);
17076 CombineTo(U, NewNode);
17077 } else if (U != Reciprocal.getNode()) {
17078 // In the absence of fast-math-flags, this user node is always the
17079 // same node as Reciprocal, but with FMF they may be different nodes.
17080 CombineTo(U, Reciprocal);
17081 }
17082 }
17083 return SDValue(N, 0); // N was replaced.
17084}
17085
17086SDValue DAGCombiner::visitFDIV(SDNode *N) {
17087 SDValue N0 = N->getOperand(0);
17088 SDValue N1 = N->getOperand(1);
17089 EVT VT = N->getValueType(0);
17090 SDLoc DL(N);
17091 const TargetOptions &Options = DAG.getTarget().Options;
17092 SDNodeFlags Flags = N->getFlags();
17093 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17094
17095 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17096 return R;
17097
17098 // fold (fdiv c1, c2) -> c1/c2
17099 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17100 return C;
17101
17102 // fold vector ops
17103 if (VT.isVector())
17104 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17105 return FoldedVOp;
17106
17107 if (SDValue NewSel = foldBinOpIntoSelect(N))
17108 return NewSel;
17109
17111 return V;
17112
17113 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17114 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17115 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17116 // Compute the reciprocal 1.0 / c2.
17117 const APFloat &N1APF = N1CFP->getValueAPF();
17118 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17119 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17120 // Only do the transform if the reciprocal is a legal fp immediate that
17121 // isn't too nasty (eg NaN, denormal, ...).
17122 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17123 (!LegalOperations ||
17124 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17125 // backend)... we should handle this gracefully after Legalize.
17126 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17128 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17129 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17130 DAG.getConstantFP(Recip, DL, VT));
17131 }
17132
17133 // If this FDIV is part of a reciprocal square root, it may be folded
17134 // into a target-specific square root estimate instruction.
17135 if (N1.getOpcode() == ISD::FSQRT) {
17136 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17137 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17138 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17139 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17140 if (SDValue RV =
17141 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17142 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17143 AddToWorklist(RV.getNode());
17144 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17145 }
17146 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17147 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17148 if (SDValue RV =
17149 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17150 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17151 AddToWorklist(RV.getNode());
17152 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17153 }
17154 } else if (N1.getOpcode() == ISD::FMUL) {
17155 // Look through an FMUL. Even though this won't remove the FDIV directly,
17156 // it's still worthwhile to get rid of the FSQRT if possible.
17157 SDValue Sqrt, Y;
17158 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17159 Sqrt = N1.getOperand(0);
17160 Y = N1.getOperand(1);
17161 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17162 Sqrt = N1.getOperand(1);
17163 Y = N1.getOperand(0);
17164 }
17165 if (Sqrt.getNode()) {
17166 // If the other multiply operand is known positive, pull it into the
17167 // sqrt. That will eliminate the division if we convert to an estimate.
17168 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17169 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17170 SDValue A;
17171 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17172 A = Y.getOperand(0);
17173 else if (Y == Sqrt.getOperand(0))
17174 A = Y;
17175 if (A) {
17176 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17177 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17178 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17179 SDValue AAZ =
17180 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17181 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17182 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17183
17184 // Estimate creation failed. Clean up speculatively created nodes.
17185 recursivelyDeleteUnusedNodes(AAZ.getNode());
17186 }
17187 }
17188
17189 // We found a FSQRT, so try to make this fold:
17190 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17191 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17192 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17193 AddToWorklist(Div.getNode());
17194 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17195 }
17196 }
17197 }
17198
17199 // Fold into a reciprocal estimate and multiply instead of a real divide.
17200 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17201 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17202 return RV;
17203 }
17204
17205 // Fold X/Sqrt(X) -> Sqrt(X)
17206 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17207 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17208 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17209 return N1;
17210
17211 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17216 SDValue NegN0 =
17217 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17218 if (NegN0) {
17219 HandleSDNode NegN0Handle(NegN0);
17220 SDValue NegN1 =
17221 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17222 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17224 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17225 }
17226
17227 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17228 return R;
17229
17230 return SDValue();
17231}
17232
17233SDValue DAGCombiner::visitFREM(SDNode *N) {
17234 SDValue N0 = N->getOperand(0);
17235 SDValue N1 = N->getOperand(1);
17236 EVT VT = N->getValueType(0);
17237 SDNodeFlags Flags = N->getFlags();
17238 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17239
17240 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17241 return R;
17242
17243 // fold (frem c1, c2) -> fmod(c1,c2)
17244 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17245 return C;
17246
17247 if (SDValue NewSel = foldBinOpIntoSelect(N))
17248 return NewSel;
17249
17250 return SDValue();
17251}
17252
17253SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17254 SDNodeFlags Flags = N->getFlags();
17255 const TargetOptions &Options = DAG.getTarget().Options;
17256
17257 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17258 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17259 if (!Flags.hasApproximateFuncs() ||
17260 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17261 return SDValue();
17262
17263 SDValue N0 = N->getOperand(0);
17264 if (TLI.isFsqrtCheap(N0, DAG))
17265 return SDValue();
17266
17267 // FSQRT nodes have flags that propagate to the created nodes.
17268 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17269 // transform the fdiv, we may produce a sub-optimal estimate sequence
17270 // because the reciprocal calculation may not have to filter out a
17271 // 0.0 input.
17272 return buildSqrtEstimate(N0, Flags);
17273}
17274
17275/// copysign(x, fp_extend(y)) -> copysign(x, y)
17276/// copysign(x, fp_round(y)) -> copysign(x, y)
17277/// Operands to the functions are the type of X and Y respectively.
17278static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17279 // Always fold no-op FP casts.
17280 if (XTy == YTy)
17281 return true;
17282
17283 // Do not optimize out type conversion of f128 type yet.
17284 // For some targets like x86_64, configuration is changed to keep one f128
17285 // value in one SSE register, but instruction selection cannot handle
17286 // FCOPYSIGN on SSE registers yet.
17287 if (YTy == MVT::f128)
17288 return false;
17289
17291}
17292
17294 SDValue N1 = N->getOperand(1);
17295 if (N1.getOpcode() != ISD::FP_EXTEND &&
17296 N1.getOpcode() != ISD::FP_ROUND)
17297 return false;
17298 EVT N1VT = N1->getValueType(0);
17299 EVT N1Op0VT = N1->getOperand(0).getValueType();
17300 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17301}
17302
17303SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17304 SDValue N0 = N->getOperand(0);
17305 SDValue N1 = N->getOperand(1);
17306 EVT VT = N->getValueType(0);
17307
17308 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17309 if (SDValue C =
17310 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17311 return C;
17312
17313 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17314 const APFloat &V = N1C->getValueAPF();
17315 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17316 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17317 if (!V.isNegative()) {
17318 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17319 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17320 } else {
17321 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17322 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17323 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17324 }
17325 }
17326
17327 // copysign(fabs(x), y) -> copysign(x, y)
17328 // copysign(fneg(x), y) -> copysign(x, y)
17329 // copysign(copysign(x,z), y) -> copysign(x, y)
17330 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17331 N0.getOpcode() == ISD::FCOPYSIGN)
17332 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17333
17334 // copysign(x, abs(y)) -> abs(x)
17335 if (N1.getOpcode() == ISD::FABS)
17336 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17337
17338 // copysign(x, copysign(y,z)) -> copysign(x, z)
17339 if (N1.getOpcode() == ISD::FCOPYSIGN)
17340 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17341
17342 // copysign(x, fp_extend(y)) -> copysign(x, y)
17343 // copysign(x, fp_round(y)) -> copysign(x, y)
17345 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17346
17347 return SDValue();
17348}
17349
17350SDValue DAGCombiner::visitFPOW(SDNode *N) {
17351 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17352 if (!ExponentC)
17353 return SDValue();
17354 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17355
17356 // Try to convert x ** (1/3) into cube root.
17357 // TODO: Handle the various flavors of long double.
17358 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17359 // Some range near 1/3 should be fine.
17360 EVT VT = N->getValueType(0);
17361 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17362 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17363 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17364 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17365 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17366 // For regular numbers, rounding may cause the results to differ.
17367 // Therefore, we require { nsz ninf nnan afn } for this transform.
17368 // TODO: We could select out the special cases if we don't have nsz/ninf.
17369 SDNodeFlags Flags = N->getFlags();
17370 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17371 !Flags.hasApproximateFuncs())
17372 return SDValue();
17373
17374 // Do not create a cbrt() libcall if the target does not have it, and do not
17375 // turn a pow that has lowering support into a cbrt() libcall.
17376 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17379 return SDValue();
17380
17381 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17382 }
17383
17384 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17385 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17386 // TODO: This could be extended (using a target hook) to handle smaller
17387 // power-of-2 fractional exponents.
17388 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17389 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17390 if (ExponentIs025 || ExponentIs075) {
17391 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17392 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17393 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17394 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17395 // For regular numbers, rounding may cause the results to differ.
17396 // Therefore, we require { nsz ninf afn } for this transform.
17397 // TODO: We could select out the special cases if we don't have nsz/ninf.
17398 SDNodeFlags Flags = N->getFlags();
17399
17400 // We only need no signed zeros for the 0.25 case.
17401 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17402 !Flags.hasApproximateFuncs())
17403 return SDValue();
17404
17405 // Don't double the number of libcalls. We are trying to inline fast code.
17407 return SDValue();
17408
17409 // Assume that libcalls are the smallest code.
17410 // TODO: This restriction should probably be lifted for vectors.
17411 if (ForCodeSize)
17412 return SDValue();
17413
17414 // pow(X, 0.25) --> sqrt(sqrt(X))
17415 SDLoc DL(N);
17416 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17417 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17418 if (ExponentIs025)
17419 return SqrtSqrt;
17420 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17421 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17422 }
17423
17424 return SDValue();
17425}
17426
17428 const TargetLowering &TLI) {
17429 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17430 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17431 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17432 // conversions would return +0.0.
17433 // FIXME: We should be able to use node-level FMF here.
17434 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17435 EVT VT = N->getValueType(0);
17436 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17438 return SDValue();
17439
17440 // fptosi/fptoui round towards zero, so converting from FP to integer and
17441 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17442 SDValue N0 = N->getOperand(0);
17443 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17444 N0.getOperand(0).getValueType() == VT)
17445 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17446
17447 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17448 N0.getOperand(0).getValueType() == VT)
17449 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17450
17451 return SDValue();
17452}
17453
17454SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17455 SDValue N0 = N->getOperand(0);
17456 EVT VT = N->getValueType(0);
17457 EVT OpVT = N0.getValueType();
17458
17459 // [us]itofp(undef) = 0, because the result value is bounded.
17460 if (N0.isUndef())
17461 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17462
17463 // fold (sint_to_fp c1) -> c1fp
17465 // ...but only if the target supports immediate floating-point values
17466 (!LegalOperations ||
17468 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17469
17470 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17471 // but UINT_TO_FP is legal on this target, try to convert.
17472 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17473 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17474 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17475 if (DAG.SignBitIsZero(N0))
17476 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17477 }
17478
17479 // The next optimizations are desirable only if SELECT_CC can be lowered.
17480 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17481 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17482 !VT.isVector() &&
17483 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17484 SDLoc DL(N);
17485 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17486 DAG.getConstantFP(0.0, DL, VT));
17487 }
17488
17489 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17490 // (select (setcc x, y, cc), 1.0, 0.0)
17491 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17492 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17493 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17494 SDLoc DL(N);
17495 return DAG.getSelect(DL, VT, N0.getOperand(0),
17496 DAG.getConstantFP(1.0, DL, VT),
17497 DAG.getConstantFP(0.0, DL, VT));
17498 }
17499
17500 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17501 return FTrunc;
17502
17503 return SDValue();
17504}
17505
17506SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17507 SDValue N0 = N->getOperand(0);
17508 EVT VT = N->getValueType(0);
17509 EVT OpVT = N0.getValueType();
17510
17511 // [us]itofp(undef) = 0, because the result value is bounded.
17512 if (N0.isUndef())
17513 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17514
17515 // fold (uint_to_fp c1) -> c1fp
17517 // ...but only if the target supports immediate floating-point values
17518 (!LegalOperations ||
17520 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17521
17522 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17523 // but SINT_TO_FP is legal on this target, try to convert.
17524 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17525 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17526 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17527 if (DAG.SignBitIsZero(N0))
17528 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17529 }
17530
17531 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17532 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17533 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17534 SDLoc DL(N);
17535 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17536 DAG.getConstantFP(0.0, DL, VT));
17537 }
17538
17539 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17540 return FTrunc;
17541
17542 return SDValue();
17543}
17544
17545// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17547 SDValue N0 = N->getOperand(0);
17548 EVT VT = N->getValueType(0);
17549
17550 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17551 return SDValue();
17552
17553 SDValue Src = N0.getOperand(0);
17554 EVT SrcVT = Src.getValueType();
17555 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17556 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17557
17558 // We can safely assume the conversion won't overflow the output range,
17559 // because (for example) (uint8_t)18293.f is undefined behavior.
17560
17561 // Since we can assume the conversion won't overflow, our decision as to
17562 // whether the input will fit in the float should depend on the minimum
17563 // of the input range and output range.
17564
17565 // This means this is also safe for a signed input and unsigned output, since
17566 // a negative input would lead to undefined behavior.
17567 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17568 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17569 unsigned ActualSize = std::min(InputSize, OutputSize);
17570 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17571
17572 // We can only fold away the float conversion if the input range can be
17573 // represented exactly in the float range.
17574 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17575 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17576 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17578 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17579 }
17580 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17581 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17582 return DAG.getBitcast(VT, Src);
17583 }
17584 return SDValue();
17585}
17586
17587SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17588 SDValue N0 = N->getOperand(0);
17589 EVT VT = N->getValueType(0);
17590
17591 // fold (fp_to_sint undef) -> undef
17592 if (N0.isUndef())
17593 return DAG.getUNDEF(VT);
17594
17595 // fold (fp_to_sint c1fp) -> c1
17597 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17598
17599 return FoldIntToFPToInt(N, DAG);
17600}
17601
17602SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17603 SDValue N0 = N->getOperand(0);
17604 EVT VT = N->getValueType(0);
17605
17606 // fold (fp_to_uint undef) -> undef
17607 if (N0.isUndef())
17608 return DAG.getUNDEF(VT);
17609
17610 // fold (fp_to_uint c1fp) -> c1
17612 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17613
17614 return FoldIntToFPToInt(N, DAG);
17615}
17616
17617SDValue DAGCombiner::visitXRINT(SDNode *N) {
17618 SDValue N0 = N->getOperand(0);
17619 EVT VT = N->getValueType(0);
17620
17621 // fold (lrint|llrint undef) -> undef
17622 if (N0.isUndef())
17623 return DAG.getUNDEF(VT);
17624
17625 // fold (lrint|llrint c1fp) -> c1
17627 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17628
17629 return SDValue();
17630}
17631
17632SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17633 SDValue N0 = N->getOperand(0);
17634 SDValue N1 = N->getOperand(1);
17635 EVT VT = N->getValueType(0);
17636
17637 // fold (fp_round c1fp) -> c1fp
17638 if (SDValue C =
17639 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17640 return C;
17641
17642 // fold (fp_round (fp_extend x)) -> x
17643 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17644 return N0.getOperand(0);
17645
17646 // fold (fp_round (fp_round x)) -> (fp_round x)
17647 if (N0.getOpcode() == ISD::FP_ROUND) {
17648 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17649 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17650
17651 // Avoid folding legal fp_rounds into non-legal ones.
17652 if (!hasOperation(ISD::FP_ROUND, VT))
17653 return SDValue();
17654
17655 // Skip this folding if it results in an fp_round from f80 to f16.
17656 //
17657 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17658 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17659 // instructions from f32 or f64. Moreover, the first (value-preserving)
17660 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17661 // x86.
17662 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17663 return SDValue();
17664
17665 // If the first fp_round isn't a value preserving truncation, it might
17666 // introduce a tie in the second fp_round, that wouldn't occur in the
17667 // single-step fp_round we want to fold to.
17668 // In other words, double rounding isn't the same as rounding.
17669 // Also, this is a value preserving truncation iff both fp_round's are.
17670 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17671 SDLoc DL(N);
17672 return DAG.getNode(
17673 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17674 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17675 }
17676 }
17677
17678 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17679 // Note: From a legality perspective, this is a two step transform. First,
17680 // we duplicate the fp_round to the arguments of the copysign, then we
17681 // eliminate the fp_round on Y. The second step requires an additional
17682 // predicate to match the implementation above.
17683 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17685 N0.getValueType())) {
17686 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17687 N0.getOperand(0), N1);
17688 AddToWorklist(Tmp.getNode());
17689 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17690 Tmp, N0.getOperand(1));
17691 }
17692
17693 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17694 return NewVSel;
17695
17696 return SDValue();
17697}
17698
17699SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17700 SDValue N0 = N->getOperand(0);
17701 EVT VT = N->getValueType(0);
17702
17703 if (VT.isVector())
17704 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17705 return FoldedVOp;
17706
17707 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17708 if (N->hasOneUse() &&
17709 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17710 return SDValue();
17711
17712 // fold (fp_extend c1fp) -> c1fp
17714 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17715
17716 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17717 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17719 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17720
17721 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17722 // value of X.
17723 if (N0.getOpcode() == ISD::FP_ROUND
17724 && N0.getConstantOperandVal(1) == 1) {
17725 SDValue In = N0.getOperand(0);
17726 if (In.getValueType() == VT) return In;
17727 if (VT.bitsLT(In.getValueType()))
17728 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17729 In, N0.getOperand(1));
17730 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17731 }
17732
17733 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17734 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17736 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17737 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17738 LN0->getChain(),
17739 LN0->getBasePtr(), N0.getValueType(),
17740 LN0->getMemOperand());
17741 CombineTo(N, ExtLoad);
17742 CombineTo(
17743 N0.getNode(),
17744 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17745 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17746 ExtLoad.getValue(1));
17747 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17748 }
17749
17750 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17751 return NewVSel;
17752
17753 return SDValue();
17754}
17755
17756SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17757 SDValue N0 = N->getOperand(0);
17758 EVT VT = N->getValueType(0);
17759
17760 // fold (fceil c1) -> fceil(c1)
17762 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17763
17764 return SDValue();
17765}
17766
17767SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17768 SDValue N0 = N->getOperand(0);
17769 EVT VT = N->getValueType(0);
17770
17771 // fold (ftrunc c1) -> ftrunc(c1)
17773 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17774
17775 // fold ftrunc (known rounded int x) -> x
17776 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17777 // likely to be generated to extract integer from a rounded floating value.
17778 switch (N0.getOpcode()) {
17779 default: break;
17780 case ISD::FRINT:
17781 case ISD::FTRUNC:
17782 case ISD::FNEARBYINT:
17783 case ISD::FROUNDEVEN:
17784 case ISD::FFLOOR:
17785 case ISD::FCEIL:
17786 return N0;
17787 }
17788
17789 return SDValue();
17790}
17791
17792SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17793 SDValue N0 = N->getOperand(0);
17794
17795 // fold (ffrexp c1) -> ffrexp(c1)
17797 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17798 return SDValue();
17799}
17800
17801SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17802 SDValue N0 = N->getOperand(0);
17803 EVT VT = N->getValueType(0);
17804
17805 // fold (ffloor c1) -> ffloor(c1)
17807 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17808
17809 return SDValue();
17810}
17811
17812SDValue DAGCombiner::visitFNEG(SDNode *N) {
17813 SDValue N0 = N->getOperand(0);
17814 EVT VT = N->getValueType(0);
17815 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17816
17817 // Constant fold FNEG.
17819 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17820
17821 if (SDValue NegN0 =
17822 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17823 return NegN0;
17824
17825 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17826 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17827 // know it was called from a context with a nsz flag if the input fsub does
17828 // not.
17829 if (N0.getOpcode() == ISD::FSUB &&
17831 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17832 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17833 N0.getOperand(0));
17834 }
17835
17836 if (SDValue Cast = foldSignChangeInBitcast(N))
17837 return Cast;
17838
17839 return SDValue();
17840}
17841
17842SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17843 SDValue N0 = N->getOperand(0);
17844 SDValue N1 = N->getOperand(1);
17845 EVT VT = N->getValueType(0);
17846 const SDNodeFlags Flags = N->getFlags();
17847 unsigned Opc = N->getOpcode();
17848 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17849 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17850 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17851
17852 // Constant fold.
17853 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17854 return C;
17855
17856 // Canonicalize to constant on RHS.
17859 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17860
17861 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17862 const APFloat &AF = N1CFP->getValueAPF();
17863
17864 // minnum(X, nan) -> X
17865 // maxnum(X, nan) -> X
17866 // minimum(X, nan) -> nan
17867 // maximum(X, nan) -> nan
17868 if (AF.isNaN())
17869 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17870
17871 // In the following folds, inf can be replaced with the largest finite
17872 // float, if the ninf flag is set.
17873 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17874 // minnum(X, -inf) -> -inf
17875 // maxnum(X, +inf) -> +inf
17876 // minimum(X, -inf) -> -inf if nnan
17877 // maximum(X, +inf) -> +inf if nnan
17878 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17879 return N->getOperand(1);
17880
17881 // minnum(X, +inf) -> X if nnan
17882 // maxnum(X, -inf) -> X if nnan
17883 // minimum(X, +inf) -> X
17884 // maximum(X, -inf) -> X
17885 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17886 return N->getOperand(0);
17887 }
17888 }
17889
17890 if (SDValue SD = reassociateReduction(
17891 PropagatesNaN
17894 Opc, SDLoc(N), VT, N0, N1, Flags))
17895 return SD;
17896
17897 return SDValue();
17898}
17899
17900SDValue DAGCombiner::visitFABS(SDNode *N) {
17901 SDValue N0 = N->getOperand(0);
17902 EVT VT = N->getValueType(0);
17903
17904 // fold (fabs c1) -> fabs(c1)
17906 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17907
17908 // fold (fabs (fabs x)) -> (fabs x)
17909 if (N0.getOpcode() == ISD::FABS)
17910 return N->getOperand(0);
17911
17912 // fold (fabs (fneg x)) -> (fabs x)
17913 // fold (fabs (fcopysign x, y)) -> (fabs x)
17914 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17915 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17916
17917 if (SDValue Cast = foldSignChangeInBitcast(N))
17918 return Cast;
17919
17920 return SDValue();
17921}
17922
17923SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17924 SDValue Chain = N->getOperand(0);
17925 SDValue N1 = N->getOperand(1);
17926 SDValue N2 = N->getOperand(2);
17927
17928 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17929 // nondeterministic jumps).
17930 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17931 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17932 N1->getOperand(0), N2);
17933 }
17934
17935 // Variant of the previous fold where there is a SETCC in between:
17936 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
17937 // =>
17938 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17939 // =>
17940 // BRCOND(SETCC(X, CONST, Cond))
17941 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17942 // isn't equivalent to true or false.
17943 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17944 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17945 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17946 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17947 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17948 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17949 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17950 bool Updated = false;
17951
17952 // Is 'X Cond C' always true or false?
17953 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17954 bool False = (Cond == ISD::SETULT && C->isZero()) ||
17955 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17956 (Cond == ISD::SETUGT && C->isAllOnes()) ||
17957 (Cond == ISD::SETGT && C->isMaxSignedValue());
17958 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17959 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17960 (Cond == ISD::SETUGE && C->isZero()) ||
17961 (Cond == ISD::SETGE && C->isMinSignedValue());
17962 return True || False;
17963 };
17964
17965 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17966 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17967 S0 = S0->getOperand(0);
17968 Updated = true;
17969 }
17970 }
17971 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17972 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17973 S1 = S1->getOperand(0);
17974 Updated = true;
17975 }
17976 }
17977
17978 if (Updated)
17979 return DAG.getNode(
17980 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17981 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17982 }
17983
17984 // If N is a constant we could fold this into a fallthrough or unconditional
17985 // branch. However that doesn't happen very often in normal code, because
17986 // Instcombine/SimplifyCFG should have handled the available opportunities.
17987 // If we did this folding here, it would be necessary to update the
17988 // MachineBasicBlock CFG, which is awkward.
17989
17990 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
17991 // on the target.
17992 if (N1.getOpcode() == ISD::SETCC &&
17994 N1.getOperand(0).getValueType())) {
17995 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
17996 Chain, N1.getOperand(2),
17997 N1.getOperand(0), N1.getOperand(1), N2);
17998 }
17999
18000 if (N1.hasOneUse()) {
18001 // rebuildSetCC calls visitXor which may change the Chain when there is a
18002 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18003 HandleSDNode ChainHandle(Chain);
18004 if (SDValue NewN1 = rebuildSetCC(N1))
18005 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18006 ChainHandle.getValue(), NewN1, N2);
18007 }
18008
18009 return SDValue();
18010}
18011
18012SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18013 if (N.getOpcode() == ISD::SRL ||
18014 (N.getOpcode() == ISD::TRUNCATE &&
18015 (N.getOperand(0).hasOneUse() &&
18016 N.getOperand(0).getOpcode() == ISD::SRL))) {
18017 // Look pass the truncate.
18018 if (N.getOpcode() == ISD::TRUNCATE)
18019 N = N.getOperand(0);
18020
18021 // Match this pattern so that we can generate simpler code:
18022 //
18023 // %a = ...
18024 // %b = and i32 %a, 2
18025 // %c = srl i32 %b, 1
18026 // brcond i32 %c ...
18027 //
18028 // into
18029 //
18030 // %a = ...
18031 // %b = and i32 %a, 2
18032 // %c = setcc eq %b, 0
18033 // brcond %c ...
18034 //
18035 // This applies only when the AND constant value has one bit set and the
18036 // SRL constant is equal to the log2 of the AND constant. The back-end is
18037 // smart enough to convert the result into a TEST/JMP sequence.
18038 SDValue Op0 = N.getOperand(0);
18039 SDValue Op1 = N.getOperand(1);
18040
18041 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18042 SDValue AndOp1 = Op0.getOperand(1);
18043
18044 if (AndOp1.getOpcode() == ISD::Constant) {
18045 const APInt &AndConst = AndOp1->getAsAPIntVal();
18046
18047 if (AndConst.isPowerOf2() &&
18048 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18049 SDLoc DL(N);
18050 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18051 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18052 ISD::SETNE);
18053 }
18054 }
18055 }
18056 }
18057
18058 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18059 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18060 if (N.getOpcode() == ISD::XOR) {
18061 // Because we may call this on a speculatively constructed
18062 // SimplifiedSetCC Node, we need to simplify this node first.
18063 // Ideally this should be folded into SimplifySetCC and not
18064 // here. For now, grab a handle to N so we don't lose it from
18065 // replacements interal to the visit.
18066 HandleSDNode XORHandle(N);
18067 while (N.getOpcode() == ISD::XOR) {
18068 SDValue Tmp = visitXOR(N.getNode());
18069 // No simplification done.
18070 if (!Tmp.getNode())
18071 break;
18072 // Returning N is form in-visit replacement that may invalidated
18073 // N. Grab value from Handle.
18074 if (Tmp.getNode() == N.getNode())
18075 N = XORHandle.getValue();
18076 else // Node simplified. Try simplifying again.
18077 N = Tmp;
18078 }
18079
18080 if (N.getOpcode() != ISD::XOR)
18081 return N;
18082
18083 SDValue Op0 = N->getOperand(0);
18084 SDValue Op1 = N->getOperand(1);
18085
18086 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18087 bool Equal = false;
18088 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18089 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18090 Op0.getValueType() == MVT::i1) {
18091 N = Op0;
18092 Op0 = N->getOperand(0);
18093 Op1 = N->getOperand(1);
18094 Equal = true;
18095 }
18096
18097 EVT SetCCVT = N.getValueType();
18098 if (LegalTypes)
18099 SetCCVT = getSetCCResultType(SetCCVT);
18100 // Replace the uses of XOR with SETCC
18101 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18102 Equal ? ISD::SETEQ : ISD::SETNE);
18103 }
18104 }
18105
18106 return SDValue();
18107}
18108
18109// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18110//
18111SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18112 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18113 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18114
18115 // If N is a constant we could fold this into a fallthrough or unconditional
18116 // branch. However that doesn't happen very often in normal code, because
18117 // Instcombine/SimplifyCFG should have handled the available opportunities.
18118 // If we did this folding here, it would be necessary to update the
18119 // MachineBasicBlock CFG, which is awkward.
18120
18121 // Use SimplifySetCC to simplify SETCC's.
18123 CondLHS, CondRHS, CC->get(), SDLoc(N),
18124 false);
18125 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18126
18127 // fold to a simpler setcc
18128 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18129 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18130 N->getOperand(0), Simp.getOperand(2),
18131 Simp.getOperand(0), Simp.getOperand(1),
18132 N->getOperand(4));
18133
18134 return SDValue();
18135}
18136
18137static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18138 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18139 const TargetLowering &TLI) {
18140 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18141 if (LD->isIndexed())
18142 return false;
18143 EVT VT = LD->getMemoryVT();
18144 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18145 return false;
18146 Ptr = LD->getBasePtr();
18147 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18148 if (ST->isIndexed())
18149 return false;
18150 EVT VT = ST->getMemoryVT();
18151 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18152 return false;
18153 Ptr = ST->getBasePtr();
18154 IsLoad = false;
18155 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18156 if (LD->isIndexed())
18157 return false;
18158 EVT VT = LD->getMemoryVT();
18159 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18160 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18161 return false;
18162 Ptr = LD->getBasePtr();
18163 IsMasked = true;
18164 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18165 if (ST->isIndexed())
18166 return false;
18167 EVT VT = ST->getMemoryVT();
18168 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18169 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18170 return false;
18171 Ptr = ST->getBasePtr();
18172 IsLoad = false;
18173 IsMasked = true;
18174 } else {
18175 return false;
18176 }
18177 return true;
18178}
18179
18180/// Try turning a load/store into a pre-indexed load/store when the base
18181/// pointer is an add or subtract and it has other uses besides the load/store.
18182/// After the transformation, the new indexed load/store has effectively folded
18183/// the add/subtract in and all of its other uses are redirected to the
18184/// new load/store.
18185bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18186 if (Level < AfterLegalizeDAG)
18187 return false;
18188
18189 bool IsLoad = true;
18190 bool IsMasked = false;
18191 SDValue Ptr;
18192 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18193 Ptr, TLI))
18194 return false;
18195
18196 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18197 // out. There is no reason to make this a preinc/predec.
18198 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18199 Ptr->hasOneUse())
18200 return false;
18201
18202 // Ask the target to do addressing mode selection.
18206 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18207 return false;
18208
18209 // Backends without true r+i pre-indexed forms may need to pass a
18210 // constant base with a variable offset so that constant coercion
18211 // will work with the patterns in canonical form.
18212 bool Swapped = false;
18213 if (isa<ConstantSDNode>(BasePtr)) {
18214 std::swap(BasePtr, Offset);
18215 Swapped = true;
18216 }
18217
18218 // Don't create a indexed load / store with zero offset.
18220 return false;
18221
18222 // Try turning it into a pre-indexed load / store except when:
18223 // 1) The new base ptr is a frame index.
18224 // 2) If N is a store and the new base ptr is either the same as or is a
18225 // predecessor of the value being stored.
18226 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18227 // that would create a cycle.
18228 // 4) All uses are load / store ops that use it as old base ptr.
18229
18230 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18231 // (plus the implicit offset) to a register to preinc anyway.
18232 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18233 return false;
18234
18235 // Check #2.
18236 if (!IsLoad) {
18237 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18238 : cast<StoreSDNode>(N)->getValue();
18239
18240 // Would require a copy.
18241 if (Val == BasePtr)
18242 return false;
18243
18244 // Would create a cycle.
18245 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18246 return false;
18247 }
18248
18249 // Caches for hasPredecessorHelper.
18252 Worklist.push_back(N);
18253
18254 // If the offset is a constant, there may be other adds of constants that
18255 // can be folded with this one. We should do this to avoid having to keep
18256 // a copy of the original base pointer.
18257 SmallVector<SDNode *, 16> OtherUses;
18258 constexpr unsigned int MaxSteps = 8192;
18259 if (isa<ConstantSDNode>(Offset))
18260 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18261 UE = BasePtr->use_end();
18262 UI != UE; ++UI) {
18263 SDUse &Use = UI.getUse();
18264 // Skip the use that is Ptr and uses of other results from BasePtr's
18265 // node (important for nodes that return multiple results).
18266 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18267 continue;
18268
18269 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18270 MaxSteps))
18271 continue;
18272
18273 if (Use.getUser()->getOpcode() != ISD::ADD &&
18274 Use.getUser()->getOpcode() != ISD::SUB) {
18275 OtherUses.clear();
18276 break;
18277 }
18278
18279 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18280 if (!isa<ConstantSDNode>(Op1)) {
18281 OtherUses.clear();
18282 break;
18283 }
18284
18285 // FIXME: In some cases, we can be smarter about this.
18286 if (Op1.getValueType() != Offset.getValueType()) {
18287 OtherUses.clear();
18288 break;
18289 }
18290
18291 OtherUses.push_back(Use.getUser());
18292 }
18293
18294 if (Swapped)
18295 std::swap(BasePtr, Offset);
18296
18297 // Now check for #3 and #4.
18298 bool RealUse = false;
18299
18300 for (SDNode *Use : Ptr->uses()) {
18301 if (Use == N)
18302 continue;
18303 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18304 return false;
18305
18306 // If Ptr may be folded in addressing mode of other use, then it's
18307 // not profitable to do this transformation.
18308 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18309 RealUse = true;
18310 }
18311
18312 if (!RealUse)
18313 return false;
18314
18316 if (!IsMasked) {
18317 if (IsLoad)
18318 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18319 else
18320 Result =
18321 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18322 } else {
18323 if (IsLoad)
18324 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18325 Offset, AM);
18326 else
18327 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18328 Offset, AM);
18329 }
18330 ++PreIndexedNodes;
18331 ++NodesCombined;
18332 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18333 Result.dump(&DAG); dbgs() << '\n');
18334 WorklistRemover DeadNodes(*this);
18335 if (IsLoad) {
18336 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18337 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18338 } else {
18339 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18340 }
18341
18342 // Finally, since the node is now dead, remove it from the graph.
18343 deleteAndRecombine(N);
18344
18345 if (Swapped)
18346 std::swap(BasePtr, Offset);
18347
18348 // Replace other uses of BasePtr that can be updated to use Ptr
18349 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18350 unsigned OffsetIdx = 1;
18351 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18352 OffsetIdx = 0;
18353 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18354 BasePtr.getNode() && "Expected BasePtr operand");
18355
18356 // We need to replace ptr0 in the following expression:
18357 // x0 * offset0 + y0 * ptr0 = t0
18358 // knowing that
18359 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18360 //
18361 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18362 // indexed load/store and the expression that needs to be re-written.
18363 //
18364 // Therefore, we have:
18365 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18366
18367 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18368 const APInt &Offset0 = CN->getAPIntValue();
18369 const APInt &Offset1 = Offset->getAsAPIntVal();
18370 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18371 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18372 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18373 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18374
18375 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18376
18377 APInt CNV = Offset0;
18378 if (X0 < 0) CNV = -CNV;
18379 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18380 else CNV = CNV - Offset1;
18381
18382 SDLoc DL(OtherUses[i]);
18383
18384 // We can now generate the new expression.
18385 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18386 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18387
18388 SDValue NewUse = DAG.getNode(Opcode,
18389 DL,
18390 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18391 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18392 deleteAndRecombine(OtherUses[i]);
18393 }
18394
18395 // Replace the uses of Ptr with uses of the updated base value.
18396 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18397 deleteAndRecombine(Ptr.getNode());
18398 AddToWorklist(Result.getNode());
18399
18400 return true;
18401}
18402
18404 SDValue &BasePtr, SDValue &Offset,
18406 SelectionDAG &DAG,
18407 const TargetLowering &TLI) {
18408 if (PtrUse == N ||
18409 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18410 return false;
18411
18412 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18413 return false;
18414
18415 // Don't create a indexed load / store with zero offset.
18417 return false;
18418
18419 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18420 return false;
18421
18423 for (SDNode *Use : BasePtr->uses()) {
18424 if (Use == Ptr.getNode())
18425 continue;
18426
18427 // No if there's a later user which could perform the index instead.
18428 if (isa<MemSDNode>(Use)) {
18429 bool IsLoad = true;
18430 bool IsMasked = false;
18431 SDValue OtherPtr;
18433 IsMasked, OtherPtr, TLI)) {
18435 Worklist.push_back(Use);
18436 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18437 return false;
18438 }
18439 }
18440
18441 // If all the uses are load / store addresses, then don't do the
18442 // transformation.
18443 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18444 for (SDNode *UseUse : Use->uses())
18445 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18446 return false;
18447 }
18448 }
18449 return true;
18450}
18451
18453 bool &IsMasked, SDValue &Ptr,
18454 SDValue &BasePtr, SDValue &Offset,
18456 SelectionDAG &DAG,
18457 const TargetLowering &TLI) {
18459 IsMasked, Ptr, TLI) ||
18460 Ptr->hasOneUse())
18461 return nullptr;
18462
18463 // Try turning it into a post-indexed load / store except when
18464 // 1) All uses are load / store ops that use it as base ptr (and
18465 // it may be folded as addressing mmode).
18466 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18467 // nor a successor of N. Otherwise, if Op is folded that would
18468 // create a cycle.
18469 for (SDNode *Op : Ptr->uses()) {
18470 // Check for #1.
18471 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18472 continue;
18473
18474 // Check for #2.
18477 constexpr unsigned int MaxSteps = 8192;
18478 // Ptr is predecessor to both N and Op.
18479 Visited.insert(Ptr.getNode());
18480 Worklist.push_back(N);
18481 Worklist.push_back(Op);
18482 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18483 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18484 return Op;
18485 }
18486 return nullptr;
18487}
18488
18489/// Try to combine a load/store with a add/sub of the base pointer node into a
18490/// post-indexed load/store. The transformation folded the add/subtract into the
18491/// new indexed load/store effectively and all of its uses are redirected to the
18492/// new load/store.
18493bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18494 if (Level < AfterLegalizeDAG)
18495 return false;
18496
18497 bool IsLoad = true;
18498 bool IsMasked = false;
18499 SDValue Ptr;
18503 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18504 Offset, AM, DAG, TLI);
18505 if (!Op)
18506 return false;
18507
18509 if (!IsMasked)
18510 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18511 Offset, AM)
18512 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18513 BasePtr, Offset, AM);
18514 else
18515 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18516 BasePtr, Offset, AM)
18518 BasePtr, Offset, AM);
18519 ++PostIndexedNodes;
18520 ++NodesCombined;
18521 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18522 Result.dump(&DAG); dbgs() << '\n');
18523 WorklistRemover DeadNodes(*this);
18524 if (IsLoad) {
18525 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18526 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18527 } else {
18528 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18529 }
18530
18531 // Finally, since the node is now dead, remove it from the graph.
18532 deleteAndRecombine(N);
18533
18534 // Replace the uses of Use with uses of the updated base value.
18536 Result.getValue(IsLoad ? 1 : 0));
18537 deleteAndRecombine(Op);
18538 return true;
18539}
18540
18541/// Return the base-pointer arithmetic from an indexed \p LD.
18542SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18543 ISD::MemIndexedMode AM = LD->getAddressingMode();
18544 assert(AM != ISD::UNINDEXED);
18545 SDValue BP = LD->getOperand(1);
18546 SDValue Inc = LD->getOperand(2);
18547
18548 // Some backends use TargetConstants for load offsets, but don't expect
18549 // TargetConstants in general ADD nodes. We can convert these constants into
18550 // regular Constants (if the constant is not opaque).
18552 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18553 "Cannot split out indexing using opaque target constants");
18554 if (Inc.getOpcode() == ISD::TargetConstant) {
18555 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18556 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18557 ConstInc->getValueType(0));
18558 }
18559
18560 unsigned Opc =
18561 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18562 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18563}
18564
18566 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18567}
18568
18569bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18570 EVT STType = Val.getValueType();
18571 EVT STMemType = ST->getMemoryVT();
18572 if (STType == STMemType)
18573 return true;
18574 if (isTypeLegal(STMemType))
18575 return false; // fail.
18576 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18577 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18578 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18579 return true;
18580 }
18581 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18582 STType.isInteger() && STMemType.isInteger()) {
18583 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18584 return true;
18585 }
18586 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18587 Val = DAG.getBitcast(STMemType, Val);
18588 return true;
18589 }
18590 return false; // fail.
18591}
18592
18593bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18594 EVT LDMemType = LD->getMemoryVT();
18595 EVT LDType = LD->getValueType(0);
18596 assert(Val.getValueType() == LDMemType &&
18597 "Attempting to extend value of non-matching type");
18598 if (LDType == LDMemType)
18599 return true;
18600 if (LDMemType.isInteger() && LDType.isInteger()) {
18601 switch (LD->getExtensionType()) {
18602 case ISD::NON_EXTLOAD:
18603 Val = DAG.getBitcast(LDType, Val);
18604 return true;
18605 case ISD::EXTLOAD:
18606 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18607 return true;
18608 case ISD::SEXTLOAD:
18609 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18610 return true;
18611 case ISD::ZEXTLOAD:
18612 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18613 return true;
18614 }
18615 }
18616 return false;
18617}
18618
18619StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18620 int64_t &Offset) {
18621 SDValue Chain = LD->getOperand(0);
18622
18623 // Look through CALLSEQ_START.
18624 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18625 Chain = Chain->getOperand(0);
18626
18627 StoreSDNode *ST = nullptr;
18629 if (Chain.getOpcode() == ISD::TokenFactor) {
18630 // Look for unique store within the TokenFactor.
18631 for (SDValue Op : Chain->ops()) {
18632 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18633 if (!Store)
18634 continue;
18635 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18636 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18637 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18638 continue;
18639 // Make sure the store is not aliased with any nodes in TokenFactor.
18640 GatherAllAliases(Store, Chain, Aliases);
18641 if (Aliases.empty() ||
18642 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18643 ST = Store;
18644 break;
18645 }
18646 } else {
18647 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18648 if (Store) {
18649 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18650 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18651 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18652 ST = Store;
18653 }
18654 }
18655
18656 return ST;
18657}
18658
18659SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18660 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18661 return SDValue();
18662 SDValue Chain = LD->getOperand(0);
18663 int64_t Offset;
18664
18665 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18666 // TODO: Relax this restriction for unordered atomics (see D66309)
18667 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18668 return SDValue();
18669
18670 EVT LDType = LD->getValueType(0);
18671 EVT LDMemType = LD->getMemoryVT();
18672 EVT STMemType = ST->getMemoryVT();
18673 EVT STType = ST->getValue().getValueType();
18674
18675 // There are two cases to consider here:
18676 // 1. The store is fixed width and the load is scalable. In this case we
18677 // don't know at compile time if the store completely envelops the load
18678 // so we abandon the optimisation.
18679 // 2. The store is scalable and the load is fixed width. We could
18680 // potentially support a limited number of cases here, but there has been
18681 // no cost-benefit analysis to prove it's worth it.
18682 bool LdStScalable = LDMemType.isScalableVT();
18683 if (LdStScalable != STMemType.isScalableVT())
18684 return SDValue();
18685
18686 // If we are dealing with scalable vectors on a big endian platform the
18687 // calculation of offsets below becomes trickier, since we do not know at
18688 // compile time the absolute size of the vector. Until we've done more
18689 // analysis on big-endian platforms it seems better to bail out for now.
18690 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18691 return SDValue();
18692
18693 // Normalize for Endianness. After this Offset=0 will denote that the least
18694 // significant bit in the loaded value maps to the least significant bit in
18695 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18696 // n:th least significant byte of the stored value.
18697 int64_t OrigOffset = Offset;
18698 if (DAG.getDataLayout().isBigEndian())
18699 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18700 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18701 8 -
18702 Offset;
18703
18704 // Check that the stored value cover all bits that are loaded.
18705 bool STCoversLD;
18706
18707 TypeSize LdMemSize = LDMemType.getSizeInBits();
18708 TypeSize StMemSize = STMemType.getSizeInBits();
18709 if (LdStScalable)
18710 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18711 else
18712 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18713 StMemSize.getFixedValue());
18714
18715 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18716 if (LD->isIndexed()) {
18717 // Cannot handle opaque target constants and we must respect the user's
18718 // request not to split indexes from loads.
18719 if (!canSplitIdx(LD))
18720 return SDValue();
18721 SDValue Idx = SplitIndexingFromLoad(LD);
18722 SDValue Ops[] = {Val, Idx, Chain};
18723 return CombineTo(LD, Ops, 3);
18724 }
18725 return CombineTo(LD, Val, Chain);
18726 };
18727
18728 if (!STCoversLD)
18729 return SDValue();
18730
18731 // Memory as copy space (potentially masked).
18732 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18733 // Simple case: Direct non-truncating forwarding
18734 if (LDType.getSizeInBits() == LdMemSize)
18735 return ReplaceLd(LD, ST->getValue(), Chain);
18736 // Can we model the truncate and extension with an and mask?
18737 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18738 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18739 // Mask to size of LDMemType
18740 auto Mask =
18742 StMemSize.getFixedValue()),
18743 SDLoc(ST), STType);
18744 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18745 return ReplaceLd(LD, Val, Chain);
18746 }
18747 }
18748
18749 // Handle some cases for big-endian that would be Offset 0 and handled for
18750 // little-endian.
18751 SDValue Val = ST->getValue();
18752 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18753 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18754 !LDType.isVector() && isTypeLegal(STType) &&
18755 TLI.isOperationLegal(ISD::SRL, STType)) {
18756 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18757 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18758 Offset = 0;
18759 }
18760 }
18761
18762 // TODO: Deal with nonzero offset.
18763 if (LD->getBasePtr().isUndef() || Offset != 0)
18764 return SDValue();
18765 // Model necessary truncations / extenstions.
18766 // Truncate Value To Stored Memory Size.
18767 do {
18768 if (!getTruncatedStoreValue(ST, Val))
18769 continue;
18770 if (!isTypeLegal(LDMemType))
18771 continue;
18772 if (STMemType != LDMemType) {
18773 // TODO: Support vectors? This requires extract_subvector/bitcast.
18774 if (!STMemType.isVector() && !LDMemType.isVector() &&
18775 STMemType.isInteger() && LDMemType.isInteger())
18776 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18777 else
18778 continue;
18779 }
18780 if (!extendLoadedValueToExtension(LD, Val))
18781 continue;
18782 return ReplaceLd(LD, Val, Chain);
18783 } while (false);
18784
18785 // On failure, cleanup dead nodes we may have created.
18786 if (Val->use_empty())
18787 deleteAndRecombine(Val.getNode());
18788 return SDValue();
18789}
18790
18791SDValue DAGCombiner::visitLOAD(SDNode *N) {
18792 LoadSDNode *LD = cast<LoadSDNode>(N);
18793 SDValue Chain = LD->getChain();
18794 SDValue Ptr = LD->getBasePtr();
18795
18796 // If load is not volatile and there are no uses of the loaded value (and
18797 // the updated indexed value in case of indexed loads), change uses of the
18798 // chain value into uses of the chain input (i.e. delete the dead load).
18799 // TODO: Allow this for unordered atomics (see D66309)
18800 if (LD->isSimple()) {
18801 if (N->getValueType(1) == MVT::Other) {
18802 // Unindexed loads.
18803 if (!N->hasAnyUseOfValue(0)) {
18804 // It's not safe to use the two value CombineTo variant here. e.g.
18805 // v1, chain2 = load chain1, loc
18806 // v2, chain3 = load chain2, loc
18807 // v3 = add v2, c
18808 // Now we replace use of chain2 with chain1. This makes the second load
18809 // isomorphic to the one we are deleting, and thus makes this load live.
18810 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18811 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18812 dbgs() << "\n");
18813 WorklistRemover DeadNodes(*this);
18814 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18815 AddUsersToWorklist(Chain.getNode());
18816 if (N->use_empty())
18817 deleteAndRecombine(N);
18818
18819 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18820 }
18821 } else {
18822 // Indexed loads.
18823 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18824
18825 // If this load has an opaque TargetConstant offset, then we cannot split
18826 // the indexing into an add/sub directly (that TargetConstant may not be
18827 // valid for a different type of node, and we cannot convert an opaque
18828 // target constant into a regular constant).
18829 bool CanSplitIdx = canSplitIdx(LD);
18830
18831 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18832 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18833 SDValue Index;
18834 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18835 Index = SplitIndexingFromLoad(LD);
18836 // Try to fold the base pointer arithmetic into subsequent loads and
18837 // stores.
18838 AddUsersToWorklist(N);
18839 } else
18840 Index = DAG.getUNDEF(N->getValueType(1));
18841 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18842 dbgs() << "\nWith: "; Undef.dump(&DAG);
18843 dbgs() << " and 2 other values\n");
18844 WorklistRemover DeadNodes(*this);
18845 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18847 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18848 deleteAndRecombine(N);
18849 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18850 }
18851 }
18852 }
18853
18854 // If this load is directly stored, replace the load value with the stored
18855 // value.
18856 if (auto V = ForwardStoreValueToDirectLoad(LD))
18857 return V;
18858
18859 // Try to infer better alignment information than the load already has.
18860 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18861 !LD->isAtomic()) {
18862 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18863 if (*Alignment > LD->getAlign() &&
18864 isAligned(*Alignment, LD->getSrcValueOffset())) {
18865 SDValue NewLoad = DAG.getExtLoad(
18866 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18867 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18868 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18869 // NewLoad will always be N as we are only refining the alignment
18870 assert(NewLoad.getNode() == N);
18871 (void)NewLoad;
18872 }
18873 }
18874 }
18875
18876 if (LD->isUnindexed()) {
18877 // Walk up chain skipping non-aliasing memory nodes.
18878 SDValue BetterChain = FindBetterChain(LD, Chain);
18879
18880 // If there is a better chain.
18881 if (Chain != BetterChain) {
18882 SDValue ReplLoad;
18883
18884 // Replace the chain to void dependency.
18885 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18886 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18887 BetterChain, Ptr, LD->getMemOperand());
18888 } else {
18889 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18890 LD->getValueType(0),
18891 BetterChain, Ptr, LD->getMemoryVT(),
18892 LD->getMemOperand());
18893 }
18894
18895 // Create token factor to keep old chain connected.
18896 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18897 MVT::Other, Chain, ReplLoad.getValue(1));
18898
18899 // Replace uses with load result and token factor
18900 return CombineTo(N, ReplLoad.getValue(0), Token);
18901 }
18902 }
18903
18904 // Try transforming N to an indexed load.
18905 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18906 return SDValue(N, 0);
18907
18908 // Try to slice up N to more direct loads if the slices are mapped to
18909 // different register banks or pairing can take place.
18910 if (SliceUpLoad(N))
18911 return SDValue(N, 0);
18912
18913 return SDValue();
18914}
18915
18916namespace {
18917
18918/// Helper structure used to slice a load in smaller loads.
18919/// Basically a slice is obtained from the following sequence:
18920/// Origin = load Ty1, Base
18921/// Shift = srl Ty1 Origin, CstTy Amount
18922/// Inst = trunc Shift to Ty2
18923///
18924/// Then, it will be rewritten into:
18925/// Slice = load SliceTy, Base + SliceOffset
18926/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18927///
18928/// SliceTy is deduced from the number of bits that are actually used to
18929/// build Inst.
18930struct LoadedSlice {
18931 /// Helper structure used to compute the cost of a slice.
18932 struct Cost {
18933 /// Are we optimizing for code size.
18934 bool ForCodeSize = false;
18935
18936 /// Various cost.
18937 unsigned Loads = 0;
18938 unsigned Truncates = 0;
18939 unsigned CrossRegisterBanksCopies = 0;
18940 unsigned ZExts = 0;
18941 unsigned Shift = 0;
18942
18943 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18944
18945 /// Get the cost of one isolated slice.
18946 Cost(const LoadedSlice &LS, bool ForCodeSize)
18947 : ForCodeSize(ForCodeSize), Loads(1) {
18948 EVT TruncType = LS.Inst->getValueType(0);
18949 EVT LoadedType = LS.getLoadedType();
18950 if (TruncType != LoadedType &&
18951 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18952 ZExts = 1;
18953 }
18954
18955 /// Account for slicing gain in the current cost.
18956 /// Slicing provide a few gains like removing a shift or a
18957 /// truncate. This method allows to grow the cost of the original
18958 /// load with the gain from this slice.
18959 void addSliceGain(const LoadedSlice &LS) {
18960 // Each slice saves a truncate.
18961 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18962 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18963 ++Truncates;
18964 // If there is a shift amount, this slice gets rid of it.
18965 if (LS.Shift)
18966 ++Shift;
18967 // If this slice can merge a cross register bank copy, account for it.
18968 if (LS.canMergeExpensiveCrossRegisterBankCopy())
18969 ++CrossRegisterBanksCopies;
18970 }
18971
18972 Cost &operator+=(const Cost &RHS) {
18973 Loads += RHS.Loads;
18974 Truncates += RHS.Truncates;
18975 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18976 ZExts += RHS.ZExts;
18977 Shift += RHS.Shift;
18978 return *this;
18979 }
18980
18981 bool operator==(const Cost &RHS) const {
18982 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18983 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18984 ZExts == RHS.ZExts && Shift == RHS.Shift;
18985 }
18986
18987 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
18988
18989 bool operator<(const Cost &RHS) const {
18990 // Assume cross register banks copies are as expensive as loads.
18991 // FIXME: Do we want some more target hooks?
18992 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
18993 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
18994 // Unless we are optimizing for code size, consider the
18995 // expensive operation first.
18996 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
18997 return ExpensiveOpsLHS < ExpensiveOpsRHS;
18998 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
18999 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19000 }
19001
19002 bool operator>(const Cost &RHS) const { return RHS < *this; }
19003
19004 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19005
19006 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19007 };
19008
19009 // The last instruction that represent the slice. This should be a
19010 // truncate instruction.
19011 SDNode *Inst;
19012
19013 // The original load instruction.
19014 LoadSDNode *Origin;
19015
19016 // The right shift amount in bits from the original load.
19017 unsigned Shift;
19018
19019 // The DAG from which Origin came from.
19020 // This is used to get some contextual information about legal types, etc.
19021 SelectionDAG *DAG;
19022
19023 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19024 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19025 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19026
19027 /// Get the bits used in a chunk of bits \p BitWidth large.
19028 /// \return Result is \p BitWidth and has used bits set to 1 and
19029 /// not used bits set to 0.
19030 APInt getUsedBits() const {
19031 // Reproduce the trunc(lshr) sequence:
19032 // - Start from the truncated value.
19033 // - Zero extend to the desired bit width.
19034 // - Shift left.
19035 assert(Origin && "No original load to compare against.");
19036 unsigned BitWidth = Origin->getValueSizeInBits(0);
19037 assert(Inst && "This slice is not bound to an instruction");
19038 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19039 "Extracted slice is bigger than the whole type!");
19040 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19041 UsedBits.setAllBits();
19042 UsedBits = UsedBits.zext(BitWidth);
19043 UsedBits <<= Shift;
19044 return UsedBits;
19045 }
19046
19047 /// Get the size of the slice to be loaded in bytes.
19048 unsigned getLoadedSize() const {
19049 unsigned SliceSize = getUsedBits().popcount();
19050 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19051 return SliceSize / 8;
19052 }
19053
19054 /// Get the type that will be loaded for this slice.
19055 /// Note: This may not be the final type for the slice.
19056 EVT getLoadedType() const {
19057 assert(DAG && "Missing context");
19058 LLVMContext &Ctxt = *DAG->getContext();
19059 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19060 }
19061
19062 /// Get the alignment of the load used for this slice.
19063 Align getAlign() const {
19064 Align Alignment = Origin->getAlign();
19065 uint64_t Offset = getOffsetFromBase();
19066 if (Offset != 0)
19067 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19068 return Alignment;
19069 }
19070
19071 /// Check if this slice can be rewritten with legal operations.
19072 bool isLegal() const {
19073 // An invalid slice is not legal.
19074 if (!Origin || !Inst || !DAG)
19075 return false;
19076
19077 // Offsets are for indexed load only, we do not handle that.
19078 if (!Origin->getOffset().isUndef())
19079 return false;
19080
19081 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19082
19083 // Check that the type is legal.
19084 EVT SliceType = getLoadedType();
19085 if (!TLI.isTypeLegal(SliceType))
19086 return false;
19087
19088 // Check that the load is legal for this type.
19089 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19090 return false;
19091
19092 // Check that the offset can be computed.
19093 // 1. Check its type.
19094 EVT PtrType = Origin->getBasePtr().getValueType();
19095 if (PtrType == MVT::Untyped || PtrType.isExtended())
19096 return false;
19097
19098 // 2. Check that it fits in the immediate.
19099 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19100 return false;
19101
19102 // 3. Check that the computation is legal.
19103 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19104 return false;
19105
19106 // Check that the zext is legal if it needs one.
19107 EVT TruncateType = Inst->getValueType(0);
19108 if (TruncateType != SliceType &&
19109 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19110 return false;
19111
19112 return true;
19113 }
19114
19115 /// Get the offset in bytes of this slice in the original chunk of
19116 /// bits.
19117 /// \pre DAG != nullptr.
19118 uint64_t getOffsetFromBase() const {
19119 assert(DAG && "Missing context.");
19120 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19121 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19122 uint64_t Offset = Shift / 8;
19123 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19124 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19125 "The size of the original loaded type is not a multiple of a"
19126 " byte.");
19127 // If Offset is bigger than TySizeInBytes, it means we are loading all
19128 // zeros. This should have been optimized before in the process.
19129 assert(TySizeInBytes > Offset &&
19130 "Invalid shift amount for given loaded size");
19131 if (IsBigEndian)
19132 Offset = TySizeInBytes - Offset - getLoadedSize();
19133 return Offset;
19134 }
19135
19136 /// Generate the sequence of instructions to load the slice
19137 /// represented by this object and redirect the uses of this slice to
19138 /// this new sequence of instructions.
19139 /// \pre this->Inst && this->Origin are valid Instructions and this
19140 /// object passed the legal check: LoadedSlice::isLegal returned true.
19141 /// \return The last instruction of the sequence used to load the slice.
19142 SDValue loadSlice() const {
19143 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19144 const SDValue &OldBaseAddr = Origin->getBasePtr();
19145 SDValue BaseAddr = OldBaseAddr;
19146 // Get the offset in that chunk of bytes w.r.t. the endianness.
19147 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19148 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19149 if (Offset) {
19150 // BaseAddr = BaseAddr + Offset.
19151 EVT ArithType = BaseAddr.getValueType();
19152 SDLoc DL(Origin);
19153 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19154 DAG->getConstant(Offset, DL, ArithType));
19155 }
19156
19157 // Create the type of the loaded slice according to its size.
19158 EVT SliceType = getLoadedType();
19159
19160 // Create the load for the slice.
19161 SDValue LastInst =
19162 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19164 Origin->getMemOperand()->getFlags());
19165 // If the final type is not the same as the loaded type, this means that
19166 // we have to pad with zero. Create a zero extend for that.
19167 EVT FinalType = Inst->getValueType(0);
19168 if (SliceType != FinalType)
19169 LastInst =
19170 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19171 return LastInst;
19172 }
19173
19174 /// Check if this slice can be merged with an expensive cross register
19175 /// bank copy. E.g.,
19176 /// i = load i32
19177 /// f = bitcast i32 i to float
19178 bool canMergeExpensiveCrossRegisterBankCopy() const {
19179 if (!Inst || !Inst->hasOneUse())
19180 return false;
19181 SDNode *Use = *Inst->use_begin();
19182 if (Use->getOpcode() != ISD::BITCAST)
19183 return false;
19184 assert(DAG && "Missing context");
19185 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19186 EVT ResVT = Use->getValueType(0);
19187 const TargetRegisterClass *ResRC =
19188 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19189 const TargetRegisterClass *ArgRC =
19190 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19191 Use->getOperand(0)->isDivergent());
19192 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19193 return false;
19194
19195 // At this point, we know that we perform a cross-register-bank copy.
19196 // Check if it is expensive.
19198 // Assume bitcasts are cheap, unless both register classes do not
19199 // explicitly share a common sub class.
19200 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19201 return false;
19202
19203 // Check if it will be merged with the load.
19204 // 1. Check the alignment / fast memory access constraint.
19205 unsigned IsFast = 0;
19206 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19207 Origin->getAddressSpace(), getAlign(),
19208 Origin->getMemOperand()->getFlags(), &IsFast) ||
19209 !IsFast)
19210 return false;
19211
19212 // 2. Check that the load is a legal operation for that type.
19213 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19214 return false;
19215
19216 // 3. Check that we do not have a zext in the way.
19217 if (Inst->getValueType(0) != getLoadedType())
19218 return false;
19219
19220 return true;
19221 }
19222};
19223
19224} // end anonymous namespace
19225
19226/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19227/// \p UsedBits looks like 0..0 1..1 0..0.
19228static bool areUsedBitsDense(const APInt &UsedBits) {
19229 // If all the bits are one, this is dense!
19230 if (UsedBits.isAllOnes())
19231 return true;
19232
19233 // Get rid of the unused bits on the right.
19234 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19235 // Get rid of the unused bits on the left.
19236 if (NarrowedUsedBits.countl_zero())
19237 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19238 // Check that the chunk of bits is completely used.
19239 return NarrowedUsedBits.isAllOnes();
19240}
19241
19242/// Check whether or not \p First and \p Second are next to each other
19243/// in memory. This means that there is no hole between the bits loaded
19244/// by \p First and the bits loaded by \p Second.
19245static bool areSlicesNextToEachOther(const LoadedSlice &First,
19246 const LoadedSlice &Second) {
19247 assert(First.Origin == Second.Origin && First.Origin &&
19248 "Unable to match different memory origins.");
19249 APInt UsedBits = First.getUsedBits();
19250 assert((UsedBits & Second.getUsedBits()) == 0 &&
19251 "Slices are not supposed to overlap.");
19252 UsedBits |= Second.getUsedBits();
19253 return areUsedBitsDense(UsedBits);
19254}
19255
19256/// Adjust the \p GlobalLSCost according to the target
19257/// paring capabilities and the layout of the slices.
19258/// \pre \p GlobalLSCost should account for at least as many loads as
19259/// there is in the slices in \p LoadedSlices.
19261 LoadedSlice::Cost &GlobalLSCost) {
19262 unsigned NumberOfSlices = LoadedSlices.size();
19263 // If there is less than 2 elements, no pairing is possible.
19264 if (NumberOfSlices < 2)
19265 return;
19266
19267 // Sort the slices so that elements that are likely to be next to each
19268 // other in memory are next to each other in the list.
19269 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19270 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19271 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19272 });
19273 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19274 // First (resp. Second) is the first (resp. Second) potentially candidate
19275 // to be placed in a paired load.
19276 const LoadedSlice *First = nullptr;
19277 const LoadedSlice *Second = nullptr;
19278 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19279 // Set the beginning of the pair.
19280 First = Second) {
19281 Second = &LoadedSlices[CurrSlice];
19282
19283 // If First is NULL, it means we start a new pair.
19284 // Get to the next slice.
19285 if (!First)
19286 continue;
19287
19288 EVT LoadedType = First->getLoadedType();
19289
19290 // If the types of the slices are different, we cannot pair them.
19291 if (LoadedType != Second->getLoadedType())
19292 continue;
19293
19294 // Check if the target supplies paired loads for this type.
19295 Align RequiredAlignment;
19296 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19297 // move to the next pair, this type is hopeless.
19298 Second = nullptr;
19299 continue;
19300 }
19301 // Check if we meet the alignment requirement.
19302 if (First->getAlign() < RequiredAlignment)
19303 continue;
19304
19305 // Check that both loads are next to each other in memory.
19306 if (!areSlicesNextToEachOther(*First, *Second))
19307 continue;
19308
19309 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19310 --GlobalLSCost.Loads;
19311 // Move to the next pair.
19312 Second = nullptr;
19313 }
19314}
19315
19316/// Check the profitability of all involved LoadedSlice.
19317/// Currently, it is considered profitable if there is exactly two
19318/// involved slices (1) which are (2) next to each other in memory, and
19319/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19320///
19321/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19322/// the elements themselves.
19323///
19324/// FIXME: When the cost model will be mature enough, we can relax
19325/// constraints (1) and (2).
19327 const APInt &UsedBits, bool ForCodeSize) {
19328 unsigned NumberOfSlices = LoadedSlices.size();
19330 return NumberOfSlices > 1;
19331
19332 // Check (1).
19333 if (NumberOfSlices != 2)
19334 return false;
19335
19336 // Check (2).
19337 if (!areUsedBitsDense(UsedBits))
19338 return false;
19339
19340 // Check (3).
19341 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19342 // The original code has one big load.
19343 OrigCost.Loads = 1;
19344 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19345 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19346 // Accumulate the cost of all the slices.
19347 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19348 GlobalSlicingCost += SliceCost;
19349
19350 // Account as cost in the original configuration the gain obtained
19351 // with the current slices.
19352 OrigCost.addSliceGain(LS);
19353 }
19354
19355 // If the target supports paired load, adjust the cost accordingly.
19356 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19357 return OrigCost > GlobalSlicingCost;
19358}
19359
19360/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19361/// operations, split it in the various pieces being extracted.
19362///
19363/// This sort of thing is introduced by SROA.
19364/// This slicing takes care not to insert overlapping loads.
19365/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19366bool DAGCombiner::SliceUpLoad(SDNode *N) {
19367 if (Level < AfterLegalizeDAG)
19368 return false;
19369
19370 LoadSDNode *LD = cast<LoadSDNode>(N);
19371 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19372 !LD->getValueType(0).isInteger())
19373 return false;
19374
19375 // The algorithm to split up a load of a scalable vector into individual
19376 // elements currently requires knowing the length of the loaded type,
19377 // so will need adjusting to work on scalable vectors.
19378 if (LD->getValueType(0).isScalableVector())
19379 return false;
19380
19381 // Keep track of already used bits to detect overlapping values.
19382 // In that case, we will just abort the transformation.
19383 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19384
19385 SmallVector<LoadedSlice, 4> LoadedSlices;
19386
19387 // Check if this load is used as several smaller chunks of bits.
19388 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19389 // of computation for each trunc.
19390 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19391 UI != UIEnd; ++UI) {
19392 // Skip the uses of the chain.
19393 if (UI.getUse().getResNo() != 0)
19394 continue;
19395
19396 SDNode *User = *UI;
19397 unsigned Shift = 0;
19398
19399 // Check if this is a trunc(lshr).
19400 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19401 isa<ConstantSDNode>(User->getOperand(1))) {
19402 Shift = User->getConstantOperandVal(1);
19403 User = *User->use_begin();
19404 }
19405
19406 // At this point, User is a Truncate, iff we encountered, trunc or
19407 // trunc(lshr).
19408 if (User->getOpcode() != ISD::TRUNCATE)
19409 return false;
19410
19411 // The width of the type must be a power of 2 and greater than 8-bits.
19412 // Otherwise the load cannot be represented in LLVM IR.
19413 // Moreover, if we shifted with a non-8-bits multiple, the slice
19414 // will be across several bytes. We do not support that.
19415 unsigned Width = User->getValueSizeInBits(0);
19416 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19417 return false;
19418
19419 // Build the slice for this chain of computations.
19420 LoadedSlice LS(User, LD, Shift, &DAG);
19421 APInt CurrentUsedBits = LS.getUsedBits();
19422
19423 // Check if this slice overlaps with another.
19424 if ((CurrentUsedBits & UsedBits) != 0)
19425 return false;
19426 // Update the bits used globally.
19427 UsedBits |= CurrentUsedBits;
19428
19429 // Check if the new slice would be legal.
19430 if (!LS.isLegal())
19431 return false;
19432
19433 // Record the slice.
19434 LoadedSlices.push_back(LS);
19435 }
19436
19437 // Abort slicing if it does not seem to be profitable.
19438 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19439 return false;
19440
19441 ++SlicedLoads;
19442
19443 // Rewrite each chain to use an independent load.
19444 // By construction, each chain can be represented by a unique load.
19445
19446 // Prepare the argument for the new token factor for all the slices.
19447 SmallVector<SDValue, 8> ArgChains;
19448 for (const LoadedSlice &LS : LoadedSlices) {
19449 SDValue SliceInst = LS.loadSlice();
19450 CombineTo(LS.Inst, SliceInst, true);
19451 if (SliceInst.getOpcode() != ISD::LOAD)
19452 SliceInst = SliceInst.getOperand(0);
19453 assert(SliceInst->getOpcode() == ISD::LOAD &&
19454 "It takes more than a zext to get to the loaded slice!!");
19455 ArgChains.push_back(SliceInst.getValue(1));
19456 }
19457
19458 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19459 ArgChains);
19460 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19461 AddToWorklist(Chain.getNode());
19462 return true;
19463}
19464
19465/// Check to see if V is (and load (ptr), imm), where the load is having
19466/// specific bytes cleared out. If so, return the byte size being masked out
19467/// and the shift amount.
19468static std::pair<unsigned, unsigned>
19470 std::pair<unsigned, unsigned> Result(0, 0);
19471
19472 // Check for the structure we're looking for.
19473 if (V->getOpcode() != ISD::AND ||
19474 !isa<ConstantSDNode>(V->getOperand(1)) ||
19475 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19476 return Result;
19477
19478 // Check the chain and pointer.
19479 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19480 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19481
19482 // This only handles simple types.
19483 if (V.getValueType() != MVT::i16 &&
19484 V.getValueType() != MVT::i32 &&
19485 V.getValueType() != MVT::i64)
19486 return Result;
19487
19488 // Check the constant mask. Invert it so that the bits being masked out are
19489 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19490 // follow the sign bit for uniformity.
19491 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19492 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19493 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19494 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19495 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19496 if (NotMaskLZ == 64) return Result; // All zero mask.
19497
19498 // See if we have a continuous run of bits. If so, we have 0*1+0*
19499 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19500 return Result;
19501
19502 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19503 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19504 NotMaskLZ -= 64-V.getValueSizeInBits();
19505
19506 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19507 switch (MaskedBytes) {
19508 case 1:
19509 case 2:
19510 case 4: break;
19511 default: return Result; // All one mask, or 5-byte mask.
19512 }
19513
19514 // Verify that the first bit starts at a multiple of mask so that the access
19515 // is aligned the same as the access width.
19516 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19517
19518 // For narrowing to be valid, it must be the case that the load the
19519 // immediately preceding memory operation before the store.
19520 if (LD == Chain.getNode())
19521 ; // ok.
19522 else if (Chain->getOpcode() == ISD::TokenFactor &&
19523 SDValue(LD, 1).hasOneUse()) {
19524 // LD has only 1 chain use so they are no indirect dependencies.
19525 if (!LD->isOperandOf(Chain.getNode()))
19526 return Result;
19527 } else
19528 return Result; // Fail.
19529
19530 Result.first = MaskedBytes;
19531 Result.second = NotMaskTZ/8;
19532 return Result;
19533}
19534
19535/// Check to see if IVal is something that provides a value as specified by
19536/// MaskInfo. If so, replace the specified store with a narrower store of
19537/// truncated IVal.
19538static SDValue
19539ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19540 SDValue IVal, StoreSDNode *St,
19541 DAGCombiner *DC) {
19542 unsigned NumBytes = MaskInfo.first;
19543 unsigned ByteShift = MaskInfo.second;
19544 SelectionDAG &DAG = DC->getDAG();
19545
19546 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19547 // that uses this. If not, this is not a replacement.
19548 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19549 ByteShift*8, (ByteShift+NumBytes)*8);
19550 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19551
19552 // Check that it is legal on the target to do this. It is legal if the new
19553 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19554 // legalization. If the source type is legal, but the store type isn't, see
19555 // if we can use a truncating store.
19556 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19557 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19558 bool UseTruncStore;
19559 if (DC->isTypeLegal(VT))
19560 UseTruncStore = false;
19561 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19562 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19563 UseTruncStore = true;
19564 else
19565 return SDValue();
19566
19567 // Can't do this for indexed stores.
19568 if (St->isIndexed())
19569 return SDValue();
19570
19571 // Check that the target doesn't think this is a bad idea.
19572 if (St->getMemOperand() &&
19573 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19574 *St->getMemOperand()))
19575 return SDValue();
19576
19577 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19578 // shifted by ByteShift and truncated down to NumBytes.
19579 if (ByteShift) {
19580 SDLoc DL(IVal);
19581 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19582 DAG.getConstant(ByteShift*8, DL,
19583 DC->getShiftAmountTy(IVal.getValueType())));
19584 }
19585
19586 // Figure out the offset for the store and the alignment of the access.
19587 unsigned StOffset;
19588 if (DAG.getDataLayout().isLittleEndian())
19589 StOffset = ByteShift;
19590 else
19591 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19592
19593 SDValue Ptr = St->getBasePtr();
19594 if (StOffset) {
19595 SDLoc DL(IVal);
19597 }
19598
19599 ++OpsNarrowed;
19600 if (UseTruncStore)
19601 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19602 St->getPointerInfo().getWithOffset(StOffset),
19603 VT, St->getOriginalAlign());
19604
19605 // Truncate down to the new size.
19606 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19607
19608 return DAG
19609 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19610 St->getPointerInfo().getWithOffset(StOffset),
19611 St->getOriginalAlign());
19612}
19613
19614/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19615/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19616/// narrowing the load and store if it would end up being a win for performance
19617/// or code size.
19618SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19619 StoreSDNode *ST = cast<StoreSDNode>(N);
19620 if (!ST->isSimple())
19621 return SDValue();
19622
19623 SDValue Chain = ST->getChain();
19624 SDValue Value = ST->getValue();
19625 SDValue Ptr = ST->getBasePtr();
19626 EVT VT = Value.getValueType();
19627
19628 if (ST->isTruncatingStore() || VT.isVector())
19629 return SDValue();
19630
19631 unsigned Opc = Value.getOpcode();
19632
19633 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19634 !Value.hasOneUse())
19635 return SDValue();
19636
19637 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19638 // is a byte mask indicating a consecutive number of bytes, check to see if
19639 // Y is known to provide just those bytes. If so, we try to replace the
19640 // load + replace + store sequence with a single (narrower) store, which makes
19641 // the load dead.
19643 std::pair<unsigned, unsigned> MaskedLoad;
19644 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19645 if (MaskedLoad.first)
19646 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19647 Value.getOperand(1), ST,this))
19648 return NewST;
19649
19650 // Or is commutative, so try swapping X and Y.
19651 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19652 if (MaskedLoad.first)
19653 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19654 Value.getOperand(0), ST,this))
19655 return NewST;
19656 }
19657
19659 return SDValue();
19660
19661 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19662 return SDValue();
19663
19664 SDValue N0 = Value.getOperand(0);
19665 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19666 Chain == SDValue(N0.getNode(), 1)) {
19667 LoadSDNode *LD = cast<LoadSDNode>(N0);
19668 if (LD->getBasePtr() != Ptr ||
19669 LD->getPointerInfo().getAddrSpace() !=
19670 ST->getPointerInfo().getAddrSpace())
19671 return SDValue();
19672
19673 // Find the type to narrow it the load / op / store to.
19674 SDValue N1 = Value.getOperand(1);
19675 unsigned BitWidth = N1.getValueSizeInBits();
19676 APInt Imm = N1->getAsAPIntVal();
19677 if (Opc == ISD::AND)
19679 if (Imm == 0 || Imm.isAllOnes())
19680 return SDValue();
19681 unsigned ShAmt = Imm.countr_zero();
19682 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19683 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19684 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19685 // The narrowing should be profitable, the load/store operation should be
19686 // legal (or custom) and the store size should be equal to the NewVT width.
19687 while (NewBW < BitWidth &&
19688 (NewVT.getStoreSizeInBits() != NewBW ||
19689 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19690 !TLI.isNarrowingProfitable(VT, NewVT))) {
19691 NewBW = NextPowerOf2(NewBW);
19692 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19693 }
19694 if (NewBW >= BitWidth)
19695 return SDValue();
19696
19697 // If the lsb changed does not start at the type bitwidth boundary,
19698 // start at the previous one.
19699 if (ShAmt % NewBW)
19700 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19702 std::min(BitWidth, ShAmt + NewBW));
19703 if ((Imm & Mask) == Imm) {
19704 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19705 if (Opc == ISD::AND)
19706 NewImm ^= APInt::getAllOnes(NewBW);
19707 uint64_t PtrOff = ShAmt / 8;
19708 // For big endian targets, we need to adjust the offset to the pointer to
19709 // load the correct bytes.
19710 if (DAG.getDataLayout().isBigEndian())
19711 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19712
19713 unsigned IsFast = 0;
19714 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19715 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19716 LD->getAddressSpace(), NewAlign,
19717 LD->getMemOperand()->getFlags(), &IsFast) ||
19718 !IsFast)
19719 return SDValue();
19720
19721 SDValue NewPtr =
19723 SDValue NewLD =
19724 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19725 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19726 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19727 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19728 DAG.getConstant(NewImm, SDLoc(Value),
19729 NewVT));
19730 SDValue NewST =
19731 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19732 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19733
19734 AddToWorklist(NewPtr.getNode());
19735 AddToWorklist(NewLD.getNode());
19736 AddToWorklist(NewVal.getNode());
19737 WorklistRemover DeadNodes(*this);
19738 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19739 ++OpsNarrowed;
19740 return NewST;
19741 }
19742 }
19743
19744 return SDValue();
19745}
19746
19747/// For a given floating point load / store pair, if the load value isn't used
19748/// by any other operations, then consider transforming the pair to integer
19749/// load / store operations if the target deems the transformation profitable.
19750SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19751 StoreSDNode *ST = cast<StoreSDNode>(N);
19752 SDValue Value = ST->getValue();
19753 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19754 Value.hasOneUse()) {
19755 LoadSDNode *LD = cast<LoadSDNode>(Value);
19756 EVT VT = LD->getMemoryVT();
19757 if (!VT.isFloatingPoint() ||
19758 VT != ST->getMemoryVT() ||
19759 LD->isNonTemporal() ||
19760 ST->isNonTemporal() ||
19761 LD->getPointerInfo().getAddrSpace() != 0 ||
19762 ST->getPointerInfo().getAddrSpace() != 0)
19763 return SDValue();
19764
19765 TypeSize VTSize = VT.getSizeInBits();
19766
19767 // We don't know the size of scalable types at compile time so we cannot
19768 // create an integer of the equivalent size.
19769 if (VTSize.isScalable())
19770 return SDValue();
19771
19772 unsigned FastLD = 0, FastST = 0;
19773 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19774 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19775 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19778 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19779 *LD->getMemOperand(), &FastLD) ||
19780 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19781 *ST->getMemOperand(), &FastST) ||
19782 !FastLD || !FastST)
19783 return SDValue();
19784
19785 SDValue NewLD =
19786 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19787 LD->getPointerInfo(), LD->getAlign());
19788
19789 SDValue NewST =
19790 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19791 ST->getPointerInfo(), ST->getAlign());
19792
19793 AddToWorklist(NewLD.getNode());
19794 AddToWorklist(NewST.getNode());
19795 WorklistRemover DeadNodes(*this);
19796 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19797 ++LdStFP2Int;
19798 return NewST;
19799 }
19800
19801 return SDValue();
19802}
19803
19804// This is a helper function for visitMUL to check the profitability
19805// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19806// MulNode is the original multiply, AddNode is (add x, c1),
19807// and ConstNode is c2.
19808//
19809// If the (add x, c1) has multiple uses, we could increase
19810// the number of adds if we make this transformation.
19811// It would only be worth doing this if we can remove a
19812// multiply in the process. Check for that here.
19813// To illustrate:
19814// (A + c1) * c3
19815// (A + c2) * c3
19816// We're checking for cases where we have common "c3 * A" expressions.
19817bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19818 SDValue ConstNode) {
19819 APInt Val;
19820
19821 // If the add only has one use, and the target thinks the folding is
19822 // profitable or does not lead to worse code, this would be OK to do.
19823 if (AddNode->hasOneUse() &&
19824 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19825 return true;
19826
19827 // Walk all the users of the constant with which we're multiplying.
19828 for (SDNode *Use : ConstNode->uses()) {
19829 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19830 continue;
19831
19832 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19833 SDNode *OtherOp;
19834 SDNode *MulVar = AddNode.getOperand(0).getNode();
19835
19836 // OtherOp is what we're multiplying against the constant.
19837 if (Use->getOperand(0) == ConstNode)
19838 OtherOp = Use->getOperand(1).getNode();
19839 else
19840 OtherOp = Use->getOperand(0).getNode();
19841
19842 // Check to see if multiply is with the same operand of our "add".
19843 //
19844 // ConstNode = CONST
19845 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19846 // ...
19847 // AddNode = (A + c1) <-- MulVar is A.
19848 // = AddNode * ConstNode <-- current visiting instruction.
19849 //
19850 // If we make this transformation, we will have a common
19851 // multiply (ConstNode * A) that we can save.
19852 if (OtherOp == MulVar)
19853 return true;
19854
19855 // Now check to see if a future expansion will give us a common
19856 // multiply.
19857 //
19858 // ConstNode = CONST
19859 // AddNode = (A + c1)
19860 // ... = AddNode * ConstNode <-- current visiting instruction.
19861 // ...
19862 // OtherOp = (A + c2)
19863 // Use = OtherOp * ConstNode <-- visiting Use.
19864 //
19865 // If we make this transformation, we will have a common
19866 // multiply (CONST * A) after we also do the same transformation
19867 // to the "t2" instruction.
19868 if (OtherOp->getOpcode() == ISD::ADD &&
19870 OtherOp->getOperand(0).getNode() == MulVar)
19871 return true;
19872 }
19873 }
19874
19875 // Didn't find a case where this would be profitable.
19876 return false;
19877}
19878
19879SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19880 unsigned NumStores) {
19883 SDLoc StoreDL(StoreNodes[0].MemNode);
19884
19885 for (unsigned i = 0; i < NumStores; ++i) {
19886 Visited.insert(StoreNodes[i].MemNode);
19887 }
19888
19889 // don't include nodes that are children or repeated nodes.
19890 for (unsigned i = 0; i < NumStores; ++i) {
19891 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19892 Chains.push_back(StoreNodes[i].MemNode->getChain());
19893 }
19894
19895 assert(!Chains.empty() && "Chain should have generated a chain");
19896 return DAG.getTokenFactor(StoreDL, Chains);
19897}
19898
19899bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19900 const Value *UnderlyingObj = nullptr;
19901 for (const auto &MemOp : StoreNodes) {
19902 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19903 // Pseudo value like stack frame has its own frame index and size, should
19904 // not use the first store's frame index for other frames.
19905 if (MMO->getPseudoValue())
19906 return false;
19907
19908 if (!MMO->getValue())
19909 return false;
19910
19911 const Value *Obj = getUnderlyingObject(MMO->getValue());
19912
19913 if (UnderlyingObj && UnderlyingObj != Obj)
19914 return false;
19915
19916 if (!UnderlyingObj)
19917 UnderlyingObj = Obj;
19918 }
19919
19920 return true;
19921}
19922
19923bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19924 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19925 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19926 // Make sure we have something to merge.
19927 if (NumStores < 2)
19928 return false;
19929
19930 assert((!UseTrunc || !UseVector) &&
19931 "This optimization cannot emit a vector truncating store");
19932
19933 // The latest Node in the DAG.
19934 SDLoc DL(StoreNodes[0].MemNode);
19935
19936 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19937 unsigned SizeInBits = NumStores * ElementSizeBits;
19938 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19939
19940 std::optional<MachineMemOperand::Flags> Flags;
19941 AAMDNodes AAInfo;
19942 for (unsigned I = 0; I != NumStores; ++I) {
19943 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19944 if (!Flags) {
19945 Flags = St->getMemOperand()->getFlags();
19946 AAInfo = St->getAAInfo();
19947 continue;
19948 }
19949 // Skip merging if there's an inconsistent flag.
19950 if (Flags != St->getMemOperand()->getFlags())
19951 return false;
19952 // Concatenate AA metadata.
19953 AAInfo = AAInfo.concat(St->getAAInfo());
19954 }
19955
19956 EVT StoreTy;
19957 if (UseVector) {
19958 unsigned Elts = NumStores * NumMemElts;
19959 // Get the type for the merged vector store.
19960 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19961 } else
19962 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19963
19964 SDValue StoredVal;
19965 if (UseVector) {
19966 if (IsConstantSrc) {
19967 SmallVector<SDValue, 8> BuildVector;
19968 for (unsigned I = 0; I != NumStores; ++I) {
19969 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19970 SDValue Val = St->getValue();
19971 // If constant is of the wrong type, convert it now. This comes up
19972 // when one of our stores was truncating.
19973 if (MemVT != Val.getValueType()) {
19974 Val = peekThroughBitcasts(Val);
19975 // Deal with constants of wrong size.
19976 if (ElementSizeBits != Val.getValueSizeInBits()) {
19977 auto *C = dyn_cast<ConstantSDNode>(Val);
19978 if (!C)
19979 // Not clear how to truncate FP values.
19980 // TODO: Handle truncation of build_vector constants
19981 return false;
19982
19983 EVT IntMemVT =
19985 Val = DAG.getConstant(C->getAPIntValue()
19986 .zextOrTrunc(Val.getValueSizeInBits())
19987 .zextOrTrunc(ElementSizeBits),
19988 SDLoc(C), IntMemVT);
19989 }
19990 // Make sure correctly size type is the correct type.
19991 Val = DAG.getBitcast(MemVT, Val);
19992 }
19993 BuildVector.push_back(Val);
19994 }
19995 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19997 DL, StoreTy, BuildVector);
19998 } else {
20000 for (unsigned i = 0; i < NumStores; ++i) {
20001 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20003 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20004 // type MemVT. If the underlying value is not the correct
20005 // type, but it is an extraction of an appropriate vector we
20006 // can recast Val to be of the correct type. This may require
20007 // converting between EXTRACT_VECTOR_ELT and
20008 // EXTRACT_SUBVECTOR.
20009 if ((MemVT != Val.getValueType()) &&
20012 EVT MemVTScalarTy = MemVT.getScalarType();
20013 // We may need to add a bitcast here to get types to line up.
20014 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20015 Val = DAG.getBitcast(MemVT, Val);
20016 } else if (MemVT.isVector() &&
20018 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20019 } else {
20020 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20022 SDValue Vec = Val.getOperand(0);
20023 SDValue Idx = Val.getOperand(1);
20024 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20025 }
20026 }
20027 Ops.push_back(Val);
20028 }
20029
20030 // Build the extracted vector elements back into a vector.
20031 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20033 DL, StoreTy, Ops);
20034 }
20035 } else {
20036 // We should always use a vector store when merging extracted vector
20037 // elements, so this path implies a store of constants.
20038 assert(IsConstantSrc && "Merged vector elements should use vector store");
20039
20040 APInt StoreInt(SizeInBits, 0);
20041
20042 // Construct a single integer constant which is made of the smaller
20043 // constant inputs.
20044 bool IsLE = DAG.getDataLayout().isLittleEndian();
20045 for (unsigned i = 0; i < NumStores; ++i) {
20046 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20047 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20048
20049 SDValue Val = St->getValue();
20050 Val = peekThroughBitcasts(Val);
20051 StoreInt <<= ElementSizeBits;
20052 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20053 StoreInt |= C->getAPIntValue()
20054 .zextOrTrunc(ElementSizeBits)
20055 .zextOrTrunc(SizeInBits);
20056 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20057 StoreInt |= C->getValueAPF()
20058 .bitcastToAPInt()
20059 .zextOrTrunc(ElementSizeBits)
20060 .zextOrTrunc(SizeInBits);
20061 // If fp truncation is necessary give up for now.
20062 if (MemVT.getSizeInBits() != ElementSizeBits)
20063 return false;
20064 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20066 // Not yet handled
20067 return false;
20068 } else {
20069 llvm_unreachable("Invalid constant element type");
20070 }
20071 }
20072
20073 // Create the new Load and Store operations.
20074 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20075 }
20076
20077 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20078 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20079 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20080
20081 // make sure we use trunc store if it's necessary to be legal.
20082 // When generate the new widen store, if the first store's pointer info can
20083 // not be reused, discard the pointer info except the address space because
20084 // now the widen store can not be represented by the original pointer info
20085 // which is for the narrow memory object.
20086 SDValue NewStore;
20087 if (!UseTrunc) {
20088 NewStore = DAG.getStore(
20089 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20090 CanReusePtrInfo
20091 ? FirstInChain->getPointerInfo()
20092 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20093 FirstInChain->getAlign(), *Flags, AAInfo);
20094 } else { // Must be realized as a trunc store
20095 EVT LegalizedStoredValTy =
20096 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20097 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20098 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20099 SDValue ExtendedStoreVal =
20100 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20101 LegalizedStoredValTy);
20102 NewStore = DAG.getTruncStore(
20103 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20104 CanReusePtrInfo
20105 ? FirstInChain->getPointerInfo()
20106 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20107 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20108 AAInfo);
20109 }
20110
20111 // Replace all merged stores with the new store.
20112 for (unsigned i = 0; i < NumStores; ++i)
20113 CombineTo(StoreNodes[i].MemNode, NewStore);
20114
20115 AddToWorklist(NewChain.getNode());
20116 return true;
20117}
20118
20119void DAGCombiner::getStoreMergeCandidates(
20120 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20121 SDNode *&RootNode) {
20122 // This holds the base pointer, index, and the offset in bytes from the base
20123 // pointer. We must have a base and an offset. Do not handle stores to undef
20124 // base pointers.
20126 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20127 return;
20128
20130 StoreSource StoreSrc = getStoreSource(Val);
20131 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20132
20133 // Match on loadbaseptr if relevant.
20134 EVT MemVT = St->getMemoryVT();
20135 BaseIndexOffset LBasePtr;
20136 EVT LoadVT;
20137 if (StoreSrc == StoreSource::Load) {
20138 auto *Ld = cast<LoadSDNode>(Val);
20139 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20140 LoadVT = Ld->getMemoryVT();
20141 // Load and store should be the same type.
20142 if (MemVT != LoadVT)
20143 return;
20144 // Loads must only have one use.
20145 if (!Ld->hasNUsesOfValue(1, 0))
20146 return;
20147 // The memory operands must not be volatile/indexed/atomic.
20148 // TODO: May be able to relax for unordered atomics (see D66309)
20149 if (!Ld->isSimple() || Ld->isIndexed())
20150 return;
20151 }
20152 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20153 int64_t &Offset) -> bool {
20154 // The memory operands must not be volatile/indexed/atomic.
20155 // TODO: May be able to relax for unordered atomics (see D66309)
20156 if (!Other->isSimple() || Other->isIndexed())
20157 return false;
20158 // Don't mix temporal stores with non-temporal stores.
20159 if (St->isNonTemporal() != Other->isNonTemporal())
20160 return false;
20162 return false;
20163 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20164 // Allow merging constants of different types as integers.
20165 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20166 : Other->getMemoryVT() != MemVT;
20167 switch (StoreSrc) {
20168 case StoreSource::Load: {
20169 if (NoTypeMatch)
20170 return false;
20171 // The Load's Base Ptr must also match.
20172 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20173 if (!OtherLd)
20174 return false;
20175 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20176 if (LoadVT != OtherLd->getMemoryVT())
20177 return false;
20178 // Loads must only have one use.
20179 if (!OtherLd->hasNUsesOfValue(1, 0))
20180 return false;
20181 // The memory operands must not be volatile/indexed/atomic.
20182 // TODO: May be able to relax for unordered atomics (see D66309)
20183 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20184 return false;
20185 // Don't mix temporal loads with non-temporal loads.
20186 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20187 return false;
20188 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20189 *OtherLd))
20190 return false;
20191 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20192 return false;
20193 break;
20194 }
20195 case StoreSource::Constant:
20196 if (NoTypeMatch)
20197 return false;
20198 if (getStoreSource(OtherBC) != StoreSource::Constant)
20199 return false;
20200 break;
20201 case StoreSource::Extract:
20202 // Do not merge truncated stores here.
20203 if (Other->isTruncatingStore())
20204 return false;
20205 if (!MemVT.bitsEq(OtherBC.getValueType()))
20206 return false;
20207 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20208 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20209 return false;
20210 break;
20211 default:
20212 llvm_unreachable("Unhandled store source for merging");
20213 }
20215 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20216 };
20217
20218 // Check if the pair of StoreNode and the RootNode already bail out many
20219 // times which is over the limit in dependence check.
20220 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20221 SDNode *RootNode) -> bool {
20222 auto RootCount = StoreRootCountMap.find(StoreNode);
20223 return RootCount != StoreRootCountMap.end() &&
20224 RootCount->second.first == RootNode &&
20225 RootCount->second.second > StoreMergeDependenceLimit;
20226 };
20227
20228 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20229 // This must be a chain use.
20230 if (UseIter.getOperandNo() != 0)
20231 return;
20232 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20234 int64_t PtrDiff;
20235 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20236 !OverLimitInDependenceCheck(OtherStore, RootNode))
20237 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20238 }
20239 };
20240
20241 // We looking for a root node which is an ancestor to all mergable
20242 // stores. We search up through a load, to our root and then down
20243 // through all children. For instance we will find Store{1,2,3} if
20244 // St is Store1, Store2. or Store3 where the root is not a load
20245 // which always true for nonvolatile ops. TODO: Expand
20246 // the search to find all valid candidates through multiple layers of loads.
20247 //
20248 // Root
20249 // |-------|-------|
20250 // Load Load Store3
20251 // | |
20252 // Store1 Store2
20253 //
20254 // FIXME: We should be able to climb and
20255 // descend TokenFactors to find candidates as well.
20256
20257 RootNode = St->getChain().getNode();
20258
20259 unsigned NumNodesExplored = 0;
20260 const unsigned MaxSearchNodes = 1024;
20261 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20262 RootNode = Ldn->getChain().getNode();
20263 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20264 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20265 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20266 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20267 TryToAddCandidate(I2);
20268 }
20269 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20270 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20271 TryToAddCandidate(I);
20272 }
20273 }
20274 } else {
20275 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20276 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20277 TryToAddCandidate(I);
20278 }
20279}
20280
20281// We need to check that merging these stores does not cause a loop in the
20282// DAG. Any store candidate may depend on another candidate indirectly through
20283// its operands. Check in parallel by searching up from operands of candidates.
20284bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20285 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20286 SDNode *RootNode) {
20287 // FIXME: We should be able to truncate a full search of
20288 // predecessors by doing a BFS and keeping tabs the originating
20289 // stores from which worklist nodes come from in a similar way to
20290 // TokenFactor simplfication.
20291
20294
20295 // RootNode is a predecessor to all candidates so we need not search
20296 // past it. Add RootNode (peeking through TokenFactors). Do not count
20297 // these towards size check.
20298
20299 Worklist.push_back(RootNode);
20300 while (!Worklist.empty()) {
20301 auto N = Worklist.pop_back_val();
20302 if (!Visited.insert(N).second)
20303 continue; // Already present in Visited.
20304 if (N->getOpcode() == ISD::TokenFactor) {
20305 for (SDValue Op : N->ops())
20306 Worklist.push_back(Op.getNode());
20307 }
20308 }
20309
20310 // Don't count pruning nodes towards max.
20311 unsigned int Max = 1024 + Visited.size();
20312 // Search Ops of store candidates.
20313 for (unsigned i = 0; i < NumStores; ++i) {
20314 SDNode *N = StoreNodes[i].MemNode;
20315 // Of the 4 Store Operands:
20316 // * Chain (Op 0) -> We have already considered these
20317 // in candidate selection, but only by following the
20318 // chain dependencies. We could still have a chain
20319 // dependency to a load, that has a non-chain dep to
20320 // another load, that depends on a store, etc. So it is
20321 // possible to have dependencies that consist of a mix
20322 // of chain and non-chain deps, and we need to include
20323 // chain operands in the analysis here..
20324 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20325 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20326 // but aren't necessarily fromt the same base node, so
20327 // cycles possible (e.g. via indexed store).
20328 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20329 // non-indexed stores). Not constant on all targets (e.g. ARM)
20330 // and so can participate in a cycle.
20331 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20332 Worklist.push_back(N->getOperand(j).getNode());
20333 }
20334 // Search through DAG. We can stop early if we find a store node.
20335 for (unsigned i = 0; i < NumStores; ++i)
20336 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20337 Max)) {
20338 // If the searching bail out, record the StoreNode and RootNode in the
20339 // StoreRootCountMap. If we have seen the pair many times over a limit,
20340 // we won't add the StoreNode into StoreNodes set again.
20341 if (Visited.size() >= Max) {
20342 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20343 if (RootCount.first == RootNode)
20344 RootCount.second++;
20345 else
20346 RootCount = {RootNode, 1};
20347 }
20348 return false;
20349 }
20350 return true;
20351}
20352
20353unsigned
20354DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20355 int64_t ElementSizeBytes) const {
20356 while (true) {
20357 // Find a store past the width of the first store.
20358 size_t StartIdx = 0;
20359 while ((StartIdx + 1 < StoreNodes.size()) &&
20360 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20361 StoreNodes[StartIdx + 1].OffsetFromBase)
20362 ++StartIdx;
20363
20364 // Bail if we don't have enough candidates to merge.
20365 if (StartIdx + 1 >= StoreNodes.size())
20366 return 0;
20367
20368 // Trim stores that overlapped with the first store.
20369 if (StartIdx)
20370 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20371
20372 // Scan the memory operations on the chain and find the first
20373 // non-consecutive store memory address.
20374 unsigned NumConsecutiveStores = 1;
20375 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20376 // Check that the addresses are consecutive starting from the second
20377 // element in the list of stores.
20378 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20379 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20380 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20381 break;
20382 NumConsecutiveStores = i + 1;
20383 }
20384 if (NumConsecutiveStores > 1)
20385 return NumConsecutiveStores;
20386
20387 // There are no consecutive stores at the start of the list.
20388 // Remove the first store and try again.
20389 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20390 }
20391}
20392
20393bool DAGCombiner::tryStoreMergeOfConstants(
20394 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20395 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20396 LLVMContext &Context = *DAG.getContext();
20397 const DataLayout &DL = DAG.getDataLayout();
20398 int64_t ElementSizeBytes = MemVT.getStoreSize();
20399 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20400 bool MadeChange = false;
20401
20402 // Store the constants into memory as one consecutive store.
20403 while (NumConsecutiveStores >= 2) {
20404 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20405 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20406 Align FirstStoreAlign = FirstInChain->getAlign();
20407 unsigned LastLegalType = 1;
20408 unsigned LastLegalVectorType = 1;
20409 bool LastIntegerTrunc = false;
20410 bool NonZero = false;
20411 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20412 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20413 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20414 SDValue StoredVal = ST->getValue();
20415 bool IsElementZero = false;
20416 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20417 IsElementZero = C->isZero();
20418 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20419 IsElementZero = C->getConstantFPValue()->isNullValue();
20420 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20421 IsElementZero = true;
20422 if (IsElementZero) {
20423 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20424 FirstZeroAfterNonZero = i;
20425 }
20426 NonZero |= !IsElementZero;
20427
20428 // Find a legal type for the constant store.
20429 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20430 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20431 unsigned IsFast = 0;
20432
20433 // Break early when size is too large to be legal.
20434 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20435 break;
20436
20437 if (TLI.isTypeLegal(StoreTy) &&
20438 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20439 DAG.getMachineFunction()) &&
20440 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20441 *FirstInChain->getMemOperand(), &IsFast) &&
20442 IsFast) {
20443 LastIntegerTrunc = false;
20444 LastLegalType = i + 1;
20445 // Or check whether a truncstore is legal.
20446 } else if (TLI.getTypeAction(Context, StoreTy) ==
20448 EVT LegalizedStoredValTy =
20449 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20450 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20451 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20452 DAG.getMachineFunction()) &&
20453 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20454 *FirstInChain->getMemOperand(), &IsFast) &&
20455 IsFast) {
20456 LastIntegerTrunc = true;
20457 LastLegalType = i + 1;
20458 }
20459 }
20460
20461 // We only use vectors if the target allows it and the function is not
20462 // marked with the noimplicitfloat attribute.
20463 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20464 AllowVectors) {
20465 // Find a legal type for the vector store.
20466 unsigned Elts = (i + 1) * NumMemElts;
20467 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20468 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20469 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20470 TLI.allowsMemoryAccess(Context, DL, Ty,
20471 *FirstInChain->getMemOperand(), &IsFast) &&
20472 IsFast)
20473 LastLegalVectorType = i + 1;
20474 }
20475 }
20476
20477 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20478 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20479 bool UseTrunc = LastIntegerTrunc && !UseVector;
20480
20481 // Check if we found a legal integer type that creates a meaningful
20482 // merge.
20483 if (NumElem < 2) {
20484 // We know that candidate stores are in order and of correct
20485 // shape. While there is no mergeable sequence from the
20486 // beginning one may start later in the sequence. The only
20487 // reason a merge of size N could have failed where another of
20488 // the same size would not have, is if the alignment has
20489 // improved or we've dropped a non-zero value. Drop as many
20490 // candidates as we can here.
20491 unsigned NumSkip = 1;
20492 while ((NumSkip < NumConsecutiveStores) &&
20493 (NumSkip < FirstZeroAfterNonZero) &&
20494 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20495 NumSkip++;
20496
20497 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20498 NumConsecutiveStores -= NumSkip;
20499 continue;
20500 }
20501
20502 // Check that we can merge these candidates without causing a cycle.
20503 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20504 RootNode)) {
20505 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20506 NumConsecutiveStores -= NumElem;
20507 continue;
20508 }
20509
20510 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20511 /*IsConstantSrc*/ true,
20512 UseVector, UseTrunc);
20513
20514 // Remove merged stores for next iteration.
20515 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20516 NumConsecutiveStores -= NumElem;
20517 }
20518 return MadeChange;
20519}
20520
20521bool DAGCombiner::tryStoreMergeOfExtracts(
20522 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20523 EVT MemVT, SDNode *RootNode) {
20524 LLVMContext &Context = *DAG.getContext();
20525 const DataLayout &DL = DAG.getDataLayout();
20526 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20527 bool MadeChange = false;
20528
20529 // Loop on Consecutive Stores on success.
20530 while (NumConsecutiveStores >= 2) {
20531 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20532 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20533 Align FirstStoreAlign = FirstInChain->getAlign();
20534 unsigned NumStoresToMerge = 1;
20535 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20536 // Find a legal type for the vector store.
20537 unsigned Elts = (i + 1) * NumMemElts;
20538 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20539 unsigned IsFast = 0;
20540
20541 // Break early when size is too large to be legal.
20542 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20543 break;
20544
20545 if (TLI.isTypeLegal(Ty) &&
20546 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20547 TLI.allowsMemoryAccess(Context, DL, Ty,
20548 *FirstInChain->getMemOperand(), &IsFast) &&
20549 IsFast)
20550 NumStoresToMerge = i + 1;
20551 }
20552
20553 // Check if we found a legal integer type creating a meaningful
20554 // merge.
20555 if (NumStoresToMerge < 2) {
20556 // We know that candidate stores are in order and of correct
20557 // shape. While there is no mergeable sequence from the
20558 // beginning one may start later in the sequence. The only
20559 // reason a merge of size N could have failed where another of
20560 // the same size would not have, is if the alignment has
20561 // improved. Drop as many candidates as we can here.
20562 unsigned NumSkip = 1;
20563 while ((NumSkip < NumConsecutiveStores) &&
20564 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20565 NumSkip++;
20566
20567 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20568 NumConsecutiveStores -= NumSkip;
20569 continue;
20570 }
20571
20572 // Check that we can merge these candidates without causing a cycle.
20573 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20574 RootNode)) {
20575 StoreNodes.erase(StoreNodes.begin(),
20576 StoreNodes.begin() + NumStoresToMerge);
20577 NumConsecutiveStores -= NumStoresToMerge;
20578 continue;
20579 }
20580
20581 MadeChange |= mergeStoresOfConstantsOrVecElts(
20582 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20583 /*UseVector*/ true, /*UseTrunc*/ false);
20584
20585 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20586 NumConsecutiveStores -= NumStoresToMerge;
20587 }
20588 return MadeChange;
20589}
20590
20591bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20592 unsigned NumConsecutiveStores, EVT MemVT,
20593 SDNode *RootNode, bool AllowVectors,
20594 bool IsNonTemporalStore,
20595 bool IsNonTemporalLoad) {
20596 LLVMContext &Context = *DAG.getContext();
20597 const DataLayout &DL = DAG.getDataLayout();
20598 int64_t ElementSizeBytes = MemVT.getStoreSize();
20599 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20600 bool MadeChange = false;
20601
20602 // Look for load nodes which are used by the stored values.
20603 SmallVector<MemOpLink, 8> LoadNodes;
20604
20605 // Find acceptable loads. Loads need to have the same chain (token factor),
20606 // must not be zext, volatile, indexed, and they must be consecutive.
20607 BaseIndexOffset LdBasePtr;
20608
20609 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20610 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20612 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20613
20614 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20615 // If this is not the first ptr that we check.
20616 int64_t LdOffset = 0;
20617 if (LdBasePtr.getBase().getNode()) {
20618 // The base ptr must be the same.
20619 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20620 break;
20621 } else {
20622 // Check that all other base pointers are the same as this one.
20623 LdBasePtr = LdPtr;
20624 }
20625
20626 // We found a potential memory operand to merge.
20627 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20628 }
20629
20630 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20631 Align RequiredAlignment;
20632 bool NeedRotate = false;
20633 if (LoadNodes.size() == 2) {
20634 // If we have load/store pair instructions and we only have two values,
20635 // don't bother merging.
20636 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20637 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20638 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20639 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20640 break;
20641 }
20642 // If the loads are reversed, see if we can rotate the halves into place.
20643 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20644 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20645 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20646 if (Offset0 - Offset1 == ElementSizeBytes &&
20647 (hasOperation(ISD::ROTL, PairVT) ||
20648 hasOperation(ISD::ROTR, PairVT))) {
20649 std::swap(LoadNodes[0], LoadNodes[1]);
20650 NeedRotate = true;
20651 }
20652 }
20653 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20654 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20655 Align FirstStoreAlign = FirstInChain->getAlign();
20656 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20657
20658 // Scan the memory operations on the chain and find the first
20659 // non-consecutive load memory address. These variables hold the index in
20660 // the store node array.
20661
20662 unsigned LastConsecutiveLoad = 1;
20663
20664 // This variable refers to the size and not index in the array.
20665 unsigned LastLegalVectorType = 1;
20666 unsigned LastLegalIntegerType = 1;
20667 bool isDereferenceable = true;
20668 bool DoIntegerTruncate = false;
20669 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20670 SDValue LoadChain = FirstLoad->getChain();
20671 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20672 // All loads must share the same chain.
20673 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20674 break;
20675
20676 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20677 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20678 break;
20679 LastConsecutiveLoad = i;
20680
20681 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20682 isDereferenceable = false;
20683
20684 // Find a legal type for the vector store.
20685 unsigned Elts = (i + 1) * NumMemElts;
20686 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20687
20688 // Break early when size is too large to be legal.
20689 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20690 break;
20691
20692 unsigned IsFastSt = 0;
20693 unsigned IsFastLd = 0;
20694 // Don't try vector types if we need a rotate. We may still fail the
20695 // legality checks for the integer type, but we can't handle the rotate
20696 // case with vectors.
20697 // FIXME: We could use a shuffle in place of the rotate.
20698 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20699 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20700 DAG.getMachineFunction()) &&
20701 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20702 *FirstInChain->getMemOperand(), &IsFastSt) &&
20703 IsFastSt &&
20704 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20705 *FirstLoad->getMemOperand(), &IsFastLd) &&
20706 IsFastLd) {
20707 LastLegalVectorType = i + 1;
20708 }
20709
20710 // Find a legal type for the integer store.
20711 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20712 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20713 if (TLI.isTypeLegal(StoreTy) &&
20714 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20715 DAG.getMachineFunction()) &&
20716 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20717 *FirstInChain->getMemOperand(), &IsFastSt) &&
20718 IsFastSt &&
20719 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20720 *FirstLoad->getMemOperand(), &IsFastLd) &&
20721 IsFastLd) {
20722 LastLegalIntegerType = i + 1;
20723 DoIntegerTruncate = false;
20724 // Or check whether a truncstore and extload is legal.
20725 } else if (TLI.getTypeAction(Context, StoreTy) ==
20727 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20728 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20729 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20730 DAG.getMachineFunction()) &&
20731 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20732 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20733 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20734 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20735 *FirstInChain->getMemOperand(), &IsFastSt) &&
20736 IsFastSt &&
20737 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20738 *FirstLoad->getMemOperand(), &IsFastLd) &&
20739 IsFastLd) {
20740 LastLegalIntegerType = i + 1;
20741 DoIntegerTruncate = true;
20742 }
20743 }
20744 }
20745
20746 // Only use vector types if the vector type is larger than the integer
20747 // type. If they are the same, use integers.
20748 bool UseVectorTy =
20749 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20750 unsigned LastLegalType =
20751 std::max(LastLegalVectorType, LastLegalIntegerType);
20752
20753 // We add +1 here because the LastXXX variables refer to location while
20754 // the NumElem refers to array/index size.
20755 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20756 NumElem = std::min(LastLegalType, NumElem);
20757 Align FirstLoadAlign = FirstLoad->getAlign();
20758
20759 if (NumElem < 2) {
20760 // We know that candidate stores are in order and of correct
20761 // shape. While there is no mergeable sequence from the
20762 // beginning one may start later in the sequence. The only
20763 // reason a merge of size N could have failed where another of
20764 // the same size would not have is if the alignment or either
20765 // the load or store has improved. Drop as many candidates as we
20766 // can here.
20767 unsigned NumSkip = 1;
20768 while ((NumSkip < LoadNodes.size()) &&
20769 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20770 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20771 NumSkip++;
20772 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20773 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20774 NumConsecutiveStores -= NumSkip;
20775 continue;
20776 }
20777
20778 // Check that we can merge these candidates without causing a cycle.
20779 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20780 RootNode)) {
20781 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20782 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20783 NumConsecutiveStores -= NumElem;
20784 continue;
20785 }
20786
20787 // Find if it is better to use vectors or integers to load and store
20788 // to memory.
20789 EVT JointMemOpVT;
20790 if (UseVectorTy) {
20791 // Find a legal type for the vector store.
20792 unsigned Elts = NumElem * NumMemElts;
20793 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20794 } else {
20795 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20796 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20797 }
20798
20799 SDLoc LoadDL(LoadNodes[0].MemNode);
20800 SDLoc StoreDL(StoreNodes[0].MemNode);
20801
20802 // The merged loads are required to have the same incoming chain, so
20803 // using the first's chain is acceptable.
20804
20805 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20806 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20807 AddToWorklist(NewStoreChain.getNode());
20808
20809 MachineMemOperand::Flags LdMMOFlags =
20810 isDereferenceable ? MachineMemOperand::MODereferenceable
20812 if (IsNonTemporalLoad)
20814
20815 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20816
20817 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20820
20821 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20822
20823 SDValue NewLoad, NewStore;
20824 if (UseVectorTy || !DoIntegerTruncate) {
20825 NewLoad = DAG.getLoad(
20826 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20827 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20828 SDValue StoreOp = NewLoad;
20829 if (NeedRotate) {
20830 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20831 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20832 "Unexpected type for rotate-able load pair");
20833 SDValue RotAmt =
20834 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20835 // Target can convert to the identical ROTR if it does not have ROTL.
20836 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20837 }
20838 NewStore = DAG.getStore(
20839 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20840 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20841 : MachinePointerInfo(FirstStoreAS),
20842 FirstStoreAlign, StMMOFlags);
20843 } else { // This must be the truncstore/extload case
20844 EVT ExtendedTy =
20845 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20846 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20847 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20848 FirstLoad->getPointerInfo(), JointMemOpVT,
20849 FirstLoadAlign, LdMMOFlags);
20850 NewStore = DAG.getTruncStore(
20851 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20852 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20853 : MachinePointerInfo(FirstStoreAS),
20854 JointMemOpVT, FirstInChain->getAlign(),
20855 FirstInChain->getMemOperand()->getFlags());
20856 }
20857
20858 // Transfer chain users from old loads to the new load.
20859 for (unsigned i = 0; i < NumElem; ++i) {
20860 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20862 SDValue(NewLoad.getNode(), 1));
20863 }
20864
20865 // Replace all stores with the new store. Recursively remove corresponding
20866 // values if they are no longer used.
20867 for (unsigned i = 0; i < NumElem; ++i) {
20868 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20869 CombineTo(StoreNodes[i].MemNode, NewStore);
20870 if (Val->use_empty())
20871 recursivelyDeleteUnusedNodes(Val.getNode());
20872 }
20873
20874 MadeChange = true;
20875 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20876 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20877 NumConsecutiveStores -= NumElem;
20878 }
20879 return MadeChange;
20880}
20881
20882bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20883 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20884 return false;
20885
20886 // TODO: Extend this function to merge stores of scalable vectors.
20887 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20888 // store since we know <vscale x 16 x i8> is exactly twice as large as
20889 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20890 EVT MemVT = St->getMemoryVT();
20891 if (MemVT.isScalableVT())
20892 return false;
20893 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20894 return false;
20895
20896 // This function cannot currently deal with non-byte-sized memory sizes.
20897 int64_t ElementSizeBytes = MemVT.getStoreSize();
20898 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20899 return false;
20900
20901 // Do not bother looking at stored values that are not constants, loads, or
20902 // extracted vector elements.
20903 SDValue StoredVal = peekThroughBitcasts(St->getValue());
20904 const StoreSource StoreSrc = getStoreSource(StoredVal);
20905 if (StoreSrc == StoreSource::Unknown)
20906 return false;
20907
20908 SmallVector<MemOpLink, 8> StoreNodes;
20909 SDNode *RootNode;
20910 // Find potential store merge candidates by searching through chain sub-DAG
20911 getStoreMergeCandidates(St, StoreNodes, RootNode);
20912
20913 // Check if there is anything to merge.
20914 if (StoreNodes.size() < 2)
20915 return false;
20916
20917 // Sort the memory operands according to their distance from the
20918 // base pointer.
20919 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20920 return LHS.OffsetFromBase < RHS.OffsetFromBase;
20921 });
20922
20923 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20924 Attribute::NoImplicitFloat);
20925 bool IsNonTemporalStore = St->isNonTemporal();
20926 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20927 cast<LoadSDNode>(StoredVal)->isNonTemporal();
20928
20929 // Store Merge attempts to merge the lowest stores. This generally
20930 // works out as if successful, as the remaining stores are checked
20931 // after the first collection of stores is merged. However, in the
20932 // case that a non-mergeable store is found first, e.g., {p[-2],
20933 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20934 // mergeable cases. To prevent this, we prune such stores from the
20935 // front of StoreNodes here.
20936 bool MadeChange = false;
20937 while (StoreNodes.size() > 1) {
20938 unsigned NumConsecutiveStores =
20939 getConsecutiveStores(StoreNodes, ElementSizeBytes);
20940 // There are no more stores in the list to examine.
20941 if (NumConsecutiveStores == 0)
20942 return MadeChange;
20943
20944 // We have at least 2 consecutive stores. Try to merge them.
20945 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20946 switch (StoreSrc) {
20947 case StoreSource::Constant:
20948 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20949 MemVT, RootNode, AllowVectors);
20950 break;
20951
20952 case StoreSource::Extract:
20953 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20954 MemVT, RootNode);
20955 break;
20956
20957 case StoreSource::Load:
20958 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20959 MemVT, RootNode, AllowVectors,
20960 IsNonTemporalStore, IsNonTemporalLoad);
20961 break;
20962
20963 default:
20964 llvm_unreachable("Unhandled store source type");
20965 }
20966 }
20967 return MadeChange;
20968}
20969
20970SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20971 SDLoc SL(ST);
20972 SDValue ReplStore;
20973
20974 // Replace the chain to avoid dependency.
20975 if (ST->isTruncatingStore()) {
20976 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20977 ST->getBasePtr(), ST->getMemoryVT(),
20978 ST->getMemOperand());
20979 } else {
20980 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20981 ST->getMemOperand());
20982 }
20983
20984 // Create token to keep both nodes around.
20985 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20986 MVT::Other, ST->getChain(), ReplStore);
20987
20988 // Make sure the new and old chains are cleaned up.
20989 AddToWorklist(Token.getNode());
20990
20991 // Don't add users to work list.
20992 return CombineTo(ST, Token, false);
20993}
20994
20995SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
20996 SDValue Value = ST->getValue();
20997 if (Value.getOpcode() == ISD::TargetConstantFP)
20998 return SDValue();
20999
21000 if (!ISD::isNormalStore(ST))
21001 return SDValue();
21002
21003 SDLoc DL(ST);
21004
21005 SDValue Chain = ST->getChain();
21006 SDValue Ptr = ST->getBasePtr();
21007
21008 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21009
21010 // NOTE: If the original store is volatile, this transform must not increase
21011 // the number of stores. For example, on x86-32 an f64 can be stored in one
21012 // processor operation but an i64 (which is not legal) requires two. So the
21013 // transform should not be done in this case.
21014
21015 SDValue Tmp;
21016 switch (CFP->getSimpleValueType(0).SimpleTy) {
21017 default:
21018 llvm_unreachable("Unknown FP type");
21019 case MVT::f16: // We don't do this for these yet.
21020 case MVT::bf16:
21021 case MVT::f80:
21022 case MVT::f128:
21023 case MVT::ppcf128:
21024 return SDValue();
21025 case MVT::f32:
21026 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21027 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21028 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21029 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21030 MVT::i32);
21031 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21032 }
21033
21034 return SDValue();
21035 case MVT::f64:
21036 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21037 ST->isSimple()) ||
21038 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21039 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21040 getZExtValue(), SDLoc(CFP), MVT::i64);
21041 return DAG.getStore(Chain, DL, Tmp,
21042 Ptr, ST->getMemOperand());
21043 }
21044
21045 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21046 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21047 // Many FP stores are not made apparent until after legalize, e.g. for
21048 // argument passing. Since this is so common, custom legalize the
21049 // 64-bit integer store into two 32-bit stores.
21051 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21052 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21053 if (DAG.getDataLayout().isBigEndian())
21054 std::swap(Lo, Hi);
21055
21056 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21057 AAMDNodes AAInfo = ST->getAAInfo();
21058
21059 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21060 ST->getOriginalAlign(), MMOFlags, AAInfo);
21062 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21063 ST->getPointerInfo().getWithOffset(4),
21064 ST->getOriginalAlign(), MMOFlags, AAInfo);
21065 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21066 St0, St1);
21067 }
21068
21069 return SDValue();
21070 }
21071}
21072
21073// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21074//
21075// If a store of a load with an element inserted into it has no other
21076// uses in between the chain, then we can consider the vector store
21077// dead and replace it with just the single scalar element store.
21078SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21079 SDLoc DL(ST);
21080 SDValue Value = ST->getValue();
21081 SDValue Ptr = ST->getBasePtr();
21082 SDValue Chain = ST->getChain();
21083 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21084 return SDValue();
21085
21086 SDValue Elt = Value.getOperand(1);
21087 SDValue Idx = Value.getOperand(2);
21088
21089 // If the element isn't byte sized or is implicitly truncated then we can't
21090 // compute an offset.
21091 EVT EltVT = Elt.getValueType();
21092 if (!EltVT.isByteSized() ||
21093 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21094 return SDValue();
21095
21096 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21097 if (!Ld || Ld->getBasePtr() != Ptr ||
21098 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21099 !ISD::isNormalStore(ST) ||
21100 Ld->getAddressSpace() != ST->getAddressSpace() ||
21102 return SDValue();
21103
21104 unsigned IsFast;
21105 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21106 Elt.getValueType(), ST->getAddressSpace(),
21107 ST->getAlign(), ST->getMemOperand()->getFlags(),
21108 &IsFast) ||
21109 !IsFast)
21110 return SDValue();
21111
21112 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21113
21114 // If the offset is a known constant then try to recover the pointer
21115 // info
21116 SDValue NewPtr;
21117 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21118 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21119 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21120 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21121 } else {
21122 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21123 }
21124
21125 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21126 ST->getMemOperand()->getFlags());
21127}
21128
21129SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21130 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21131 SDValue Val = ST->getVal();
21132 EVT VT = Val.getValueType();
21133 EVT MemVT = ST->getMemoryVT();
21134
21135 if (MemVT.bitsLT(VT)) { // Is truncating store
21136 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21137 MemVT.getScalarSizeInBits());
21138 // See if we can simplify the operation with SimplifyDemandedBits, which
21139 // only works if the value has a single use.
21140 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21141 return SDValue(N, 0);
21142 }
21143
21144 return SDValue();
21145}
21146
21147SDValue DAGCombiner::visitSTORE(SDNode *N) {
21148 StoreSDNode *ST = cast<StoreSDNode>(N);
21149 SDValue Chain = ST->getChain();
21150 SDValue Value = ST->getValue();
21151 SDValue Ptr = ST->getBasePtr();
21152
21153 // If this is a store of a bit convert, store the input value if the
21154 // resultant store does not need a higher alignment than the original.
21155 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21156 ST->isUnindexed()) {
21157 EVT SVT = Value.getOperand(0).getValueType();
21158 // If the store is volatile, we only want to change the store type if the
21159 // resulting store is legal. Otherwise we might increase the number of
21160 // memory accesses. We don't care if the original type was legal or not
21161 // as we assume software couldn't rely on the number of accesses of an
21162 // illegal type.
21163 // TODO: May be able to relax for unordered atomics (see D66309)
21164 if (((!LegalOperations && ST->isSimple()) ||
21165 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21166 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21167 DAG, *ST->getMemOperand())) {
21168 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21169 ST->getMemOperand());
21170 }
21171 }
21172
21173 // Turn 'store undef, Ptr' -> nothing.
21174 if (Value.isUndef() && ST->isUnindexed())
21175 return Chain;
21176
21177 // Try to infer better alignment information than the store already has.
21178 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21179 !ST->isAtomic()) {
21180 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21181 if (*Alignment > ST->getAlign() &&
21182 isAligned(*Alignment, ST->getSrcValueOffset())) {
21183 SDValue NewStore =
21184 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21185 ST->getMemoryVT(), *Alignment,
21186 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21187 // NewStore will always be N as we are only refining the alignment
21188 assert(NewStore.getNode() == N);
21189 (void)NewStore;
21190 }
21191 }
21192 }
21193
21194 // Try transforming a pair floating point load / store ops to integer
21195 // load / store ops.
21196 if (SDValue NewST = TransformFPLoadStorePair(N))
21197 return NewST;
21198
21199 // Try transforming several stores into STORE (BSWAP).
21200 if (SDValue Store = mergeTruncStores(ST))
21201 return Store;
21202
21203 if (ST->isUnindexed()) {
21204 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21205 // adjacent stores.
21206 if (findBetterNeighborChains(ST)) {
21207 // replaceStoreChain uses CombineTo, which handled all of the worklist
21208 // manipulation. Return the original node to not do anything else.
21209 return SDValue(ST, 0);
21210 }
21211 Chain = ST->getChain();
21212 }
21213
21214 // FIXME: is there such a thing as a truncating indexed store?
21215 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21216 Value.getValueType().isInteger() &&
21217 (!isa<ConstantSDNode>(Value) ||
21218 !cast<ConstantSDNode>(Value)->isOpaque())) {
21219 // Convert a truncating store of a extension into a standard store.
21220 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21221 Value.getOpcode() == ISD::SIGN_EXTEND ||
21222 Value.getOpcode() == ISD::ANY_EXTEND) &&
21223 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21224 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21225 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21226 ST->getMemOperand());
21227
21228 APInt TruncDemandedBits =
21229 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21230 ST->getMemoryVT().getScalarSizeInBits());
21231
21232 // See if we can simplify the operation with SimplifyDemandedBits, which
21233 // only works if the value has a single use.
21234 AddToWorklist(Value.getNode());
21235 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21236 // Re-visit the store if anything changed and the store hasn't been merged
21237 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21238 // node back to the worklist if necessary, but we also need to re-visit
21239 // the Store node itself.
21240 if (N->getOpcode() != ISD::DELETED_NODE)
21241 AddToWorklist(N);
21242 return SDValue(N, 0);
21243 }
21244
21245 // Otherwise, see if we can simplify the input to this truncstore with
21246 // knowledge that only the low bits are being used. For example:
21247 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21248 if (SDValue Shorter =
21249 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21250 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21251 ST->getMemOperand());
21252
21253 // If we're storing a truncated constant, see if we can simplify it.
21254 // TODO: Move this to targetShrinkDemandedConstant?
21255 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21256 if (!Cst->isOpaque()) {
21257 const APInt &CValue = Cst->getAPIntValue();
21258 APInt NewVal = CValue & TruncDemandedBits;
21259 if (NewVal != CValue) {
21260 SDValue Shorter =
21261 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21262 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21263 ST->getMemoryVT(), ST->getMemOperand());
21264 }
21265 }
21266 }
21267
21268 // If this is a load followed by a store to the same location, then the store
21269 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21270 // TODO: Add big-endian truncate support with test coverage.
21271 // TODO: Can relax for unordered atomics (see D66309)
21272 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21274 : Value;
21275 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21276 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21277 ST->isUnindexed() && ST->isSimple() &&
21278 Ld->getAddressSpace() == ST->getAddressSpace() &&
21279 // There can't be any side effects between the load and store, such as
21280 // a call or store.
21282 // The store is dead, remove it.
21283 return Chain;
21284 }
21285 }
21286
21287 // Try scalarizing vector stores of loads where we only change one element
21288 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21289 return NewST;
21290
21291 // TODO: Can relax for unordered atomics (see D66309)
21292 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21293 if (ST->isUnindexed() && ST->isSimple() &&
21294 ST1->isUnindexed() && ST1->isSimple()) {
21295 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21296 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21297 ST->getAddressSpace() == ST1->getAddressSpace()) {
21298 // If this is a store followed by a store with the same value to the
21299 // same location, then the store is dead/noop.
21300 return Chain;
21301 }
21302
21303 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21304 !ST1->getBasePtr().isUndef() &&
21305 ST->getAddressSpace() == ST1->getAddressSpace()) {
21306 // If we consider two stores and one smaller in size is a scalable
21307 // vector type and another one a bigger size store with a fixed type,
21308 // then we could not allow the scalable store removal because we don't
21309 // know its final size in the end.
21310 if (ST->getMemoryVT().isScalableVector() ||
21311 ST1->getMemoryVT().isScalableVector()) {
21312 if (ST1->getBasePtr() == Ptr &&
21313 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21314 ST->getMemoryVT().getStoreSize())) {
21315 CombineTo(ST1, ST1->getChain());
21316 return SDValue(N, 0);
21317 }
21318 } else {
21319 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21320 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21321 // If this is a store who's preceding store to a subset of the current
21322 // location and no one other node is chained to that store we can
21323 // effectively drop the store. Do not remove stores to undef as they
21324 // may be used as data sinks.
21325 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21326 ChainBase,
21327 ST1->getMemoryVT().getFixedSizeInBits())) {
21328 CombineTo(ST1, ST1->getChain());
21329 return SDValue(N, 0);
21330 }
21331 }
21332 }
21333 }
21334 }
21335
21336 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21337 // truncating store. We can do this even if this is already a truncstore.
21338 if ((Value.getOpcode() == ISD::FP_ROUND ||
21339 Value.getOpcode() == ISD::TRUNCATE) &&
21340 Value->hasOneUse() && ST->isUnindexed() &&
21341 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21342 ST->getMemoryVT(), LegalOperations)) {
21343 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21344 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21345 }
21346
21347 // Always perform this optimization before types are legal. If the target
21348 // prefers, also try this after legalization to catch stores that were created
21349 // by intrinsics or other nodes.
21350 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21351 while (true) {
21352 // There can be multiple store sequences on the same chain.
21353 // Keep trying to merge store sequences until we are unable to do so
21354 // or until we merge the last store on the chain.
21355 bool Changed = mergeConsecutiveStores(ST);
21356 if (!Changed) break;
21357 // Return N as merge only uses CombineTo and no worklist clean
21358 // up is necessary.
21359 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21360 return SDValue(N, 0);
21361 }
21362 }
21363
21364 // Try transforming N to an indexed store.
21365 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21366 return SDValue(N, 0);
21367
21368 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21369 //
21370 // Make sure to do this only after attempting to merge stores in order to
21371 // avoid changing the types of some subset of stores due to visit order,
21372 // preventing their merging.
21373 if (isa<ConstantFPSDNode>(ST->getValue())) {
21374 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21375 return NewSt;
21376 }
21377
21378 if (SDValue NewSt = splitMergedValStore(ST))
21379 return NewSt;
21380
21381 return ReduceLoadOpStoreWidth(N);
21382}
21383
21384SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21385 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21386 if (!LifetimeEnd->hasOffset())
21387 return SDValue();
21388
21389 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21390 LifetimeEnd->getOffset(), false);
21391
21392 // We walk up the chains to find stores.
21393 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21394 while (!Chains.empty()) {
21395 SDValue Chain = Chains.pop_back_val();
21396 if (!Chain.hasOneUse())
21397 continue;
21398 switch (Chain.getOpcode()) {
21399 case ISD::TokenFactor:
21400 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21401 Chains.push_back(Chain.getOperand(--Nops));
21402 break;
21404 case ISD::LIFETIME_END:
21405 // We can forward past any lifetime start/end that can be proven not to
21406 // alias the node.
21407 if (!mayAlias(Chain.getNode(), N))
21408 Chains.push_back(Chain.getOperand(0));
21409 break;
21410 case ISD::STORE: {
21411 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21412 // TODO: Can relax for unordered atomics (see D66309)
21413 if (!ST->isSimple() || ST->isIndexed())
21414 continue;
21415 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21416 // The bounds of a scalable store are not known until runtime, so this
21417 // store cannot be elided.
21418 if (StoreSize.isScalable())
21419 continue;
21420 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21421 // If we store purely within object bounds just before its lifetime ends,
21422 // we can remove the store.
21423 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21424 StoreSize.getFixedValue() * 8)) {
21425 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21426 dbgs() << "\nwithin LIFETIME_END of : ";
21427 LifetimeEndBase.dump(); dbgs() << "\n");
21428 CombineTo(ST, ST->getChain());
21429 return SDValue(N, 0);
21430 }
21431 }
21432 }
21433 }
21434 return SDValue();
21435}
21436
21437/// For the instruction sequence of store below, F and I values
21438/// are bundled together as an i64 value before being stored into memory.
21439/// Sometimes it is more efficent to generate separate stores for F and I,
21440/// which can remove the bitwise instructions or sink them to colder places.
21441///
21442/// (store (or (zext (bitcast F to i32) to i64),
21443/// (shl (zext I to i64), 32)), addr) -->
21444/// (store F, addr) and (store I, addr+4)
21445///
21446/// Similarly, splitting for other merged store can also be beneficial, like:
21447/// For pair of {i32, i32}, i64 store --> two i32 stores.
21448/// For pair of {i32, i16}, i64 store --> two i32 stores.
21449/// For pair of {i16, i16}, i32 store --> two i16 stores.
21450/// For pair of {i16, i8}, i32 store --> two i16 stores.
21451/// For pair of {i8, i8}, i16 store --> two i8 stores.
21452///
21453/// We allow each target to determine specifically which kind of splitting is
21454/// supported.
21455///
21456/// The store patterns are commonly seen from the simple code snippet below
21457/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21458/// void goo(const std::pair<int, float> &);
21459/// hoo() {
21460/// ...
21461/// goo(std::make_pair(tmp, ftmp));
21462/// ...
21463/// }
21464///
21465SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21466 if (OptLevel == CodeGenOptLevel::None)
21467 return SDValue();
21468
21469 // Can't change the number of memory accesses for a volatile store or break
21470 // atomicity for an atomic one.
21471 if (!ST->isSimple())
21472 return SDValue();
21473
21474 SDValue Val = ST->getValue();
21475 SDLoc DL(ST);
21476
21477 // Match OR operand.
21478 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21479 return SDValue();
21480
21481 // Match SHL operand and get Lower and Higher parts of Val.
21482 SDValue Op1 = Val.getOperand(0);
21483 SDValue Op2 = Val.getOperand(1);
21484 SDValue Lo, Hi;
21485 if (Op1.getOpcode() != ISD::SHL) {
21486 std::swap(Op1, Op2);
21487 if (Op1.getOpcode() != ISD::SHL)
21488 return SDValue();
21489 }
21490 Lo = Op2;
21491 Hi = Op1.getOperand(0);
21492 if (!Op1.hasOneUse())
21493 return SDValue();
21494
21495 // Match shift amount to HalfValBitSize.
21496 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21497 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21498 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21499 return SDValue();
21500
21501 // Lo and Hi are zero-extended from int with size less equal than 32
21502 // to i64.
21503 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21504 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21505 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21506 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21507 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21508 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21509 return SDValue();
21510
21511 // Use the EVT of low and high parts before bitcast as the input
21512 // of target query.
21513 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21514 ? Lo.getOperand(0).getValueType()
21515 : Lo.getValueType();
21516 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21517 ? Hi.getOperand(0).getValueType()
21518 : Hi.getValueType();
21519 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21520 return SDValue();
21521
21522 // Start to split store.
21523 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21524 AAMDNodes AAInfo = ST->getAAInfo();
21525
21526 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21527 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21528 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21529 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21530
21531 SDValue Chain = ST->getChain();
21532 SDValue Ptr = ST->getBasePtr();
21533 // Lower value store.
21534 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21535 ST->getOriginalAlign(), MMOFlags, AAInfo);
21536 Ptr =
21537 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21538 // Higher value store.
21539 SDValue St1 = DAG.getStore(
21540 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21541 ST->getOriginalAlign(), MMOFlags, AAInfo);
21542 return St1;
21543}
21544
21545// Merge an insertion into an existing shuffle:
21546// (insert_vector_elt (vector_shuffle X, Y, Mask),
21547// .(extract_vector_elt X, N), InsIndex)
21548// --> (vector_shuffle X, Y, NewMask)
21549// and variations where shuffle operands may be CONCAT_VECTORS.
21551 SmallVectorImpl<int> &NewMask, SDValue Elt,
21552 unsigned InsIndex) {
21553 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21554 !isa<ConstantSDNode>(Elt.getOperand(1)))
21555 return false;
21556
21557 // Vec's operand 0 is using indices from 0 to N-1 and
21558 // operand 1 from N to 2N - 1, where N is the number of
21559 // elements in the vectors.
21560 SDValue InsertVal0 = Elt.getOperand(0);
21561 int ElementOffset = -1;
21562
21563 // We explore the inputs of the shuffle in order to see if we find the
21564 // source of the extract_vector_elt. If so, we can use it to modify the
21565 // shuffle rather than perform an insert_vector_elt.
21567 ArgWorkList.emplace_back(Mask.size(), Y);
21568 ArgWorkList.emplace_back(0, X);
21569
21570 while (!ArgWorkList.empty()) {
21571 int ArgOffset;
21572 SDValue ArgVal;
21573 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21574
21575 if (ArgVal == InsertVal0) {
21576 ElementOffset = ArgOffset;
21577 break;
21578 }
21579
21580 // Peek through concat_vector.
21581 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21582 int CurrentArgOffset =
21583 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21584 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21585 for (SDValue Op : reverse(ArgVal->ops())) {
21586 CurrentArgOffset -= Step;
21587 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21588 }
21589
21590 // Make sure we went through all the elements and did not screw up index
21591 // computation.
21592 assert(CurrentArgOffset == ArgOffset);
21593 }
21594 }
21595
21596 // If we failed to find a match, see if we can replace an UNDEF shuffle
21597 // operand.
21598 if (ElementOffset == -1) {
21599 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21600 return false;
21601 ElementOffset = Mask.size();
21602 Y = InsertVal0;
21603 }
21604
21605 NewMask.assign(Mask.begin(), Mask.end());
21606 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21607 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21608 "NewMask[InsIndex] is out of bound");
21609 return true;
21610}
21611
21612// Merge an insertion into an existing shuffle:
21613// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21614// InsIndex)
21615// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21616// CONCAT_VECTORS.
21617SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21618 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21619 "Expected extract_vector_elt");
21620 SDValue InsertVal = N->getOperand(1);
21621 SDValue Vec = N->getOperand(0);
21622
21623 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21624 if (!SVN || !Vec.hasOneUse())
21625 return SDValue();
21626
21627 ArrayRef<int> Mask = SVN->getMask();
21628 SDValue X = Vec.getOperand(0);
21629 SDValue Y = Vec.getOperand(1);
21630
21631 SmallVector<int, 16> NewMask(Mask);
21632 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21633 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21634 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21635 if (LegalShuffle)
21636 return LegalShuffle;
21637 }
21638
21639 return SDValue();
21640}
21641
21642// Convert a disguised subvector insertion into a shuffle:
21643// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21644// bitcast(shuffle (bitcast V), (extended X), Mask)
21645// Note: We do not use an insert_subvector node because that requires a
21646// legal subvector type.
21647SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21648 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21649 "Expected extract_vector_elt");
21650 SDValue InsertVal = N->getOperand(1);
21651
21652 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21653 !InsertVal.getOperand(0).getValueType().isVector())
21654 return SDValue();
21655
21656 SDValue SubVec = InsertVal.getOperand(0);
21657 SDValue DestVec = N->getOperand(0);
21658 EVT SubVecVT = SubVec.getValueType();
21659 EVT VT = DestVec.getValueType();
21660 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21661 // If the source only has a single vector element, the cost of creating adding
21662 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21663 if (NumSrcElts == 1)
21664 return SDValue();
21665 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21666 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21667
21668 // Step 1: Create a shuffle mask that implements this insert operation. The
21669 // vector that we are inserting into will be operand 0 of the shuffle, so
21670 // those elements are just 'i'. The inserted subvector is in the first
21671 // positions of operand 1 of the shuffle. Example:
21672 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21673 SmallVector<int, 16> Mask(NumMaskVals);
21674 for (unsigned i = 0; i != NumMaskVals; ++i) {
21675 if (i / NumSrcElts == InsIndex)
21676 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21677 else
21678 Mask[i] = i;
21679 }
21680
21681 // Bail out if the target can not handle the shuffle we want to create.
21682 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21683 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21684 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21685 return SDValue();
21686
21687 // Step 2: Create a wide vector from the inserted source vector by appending
21688 // undefined elements. This is the same size as our destination vector.
21689 SDLoc DL(N);
21690 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21691 ConcatOps[0] = SubVec;
21692 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21693
21694 // Step 3: Shuffle in the padded subvector.
21695 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21696 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21697 AddToWorklist(PaddedSubV.getNode());
21698 AddToWorklist(DestVecBC.getNode());
21699 AddToWorklist(Shuf.getNode());
21700 return DAG.getBitcast(VT, Shuf);
21701}
21702
21703// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21704// possible and the new load will be quick. We use more loads but less shuffles
21705// and inserts.
21706SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21707 EVT VT = N->getValueType(0);
21708
21709 // InsIndex is expected to be the first of last lane.
21710 if (!VT.isFixedLengthVector() ||
21711 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21712 return SDValue();
21713
21714 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21715 // depending on the InsIndex.
21716 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21717 SDValue Scalar = N->getOperand(1);
21718 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21719 return InsIndex == P.index() || P.value() < 0 ||
21720 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21721 (InsIndex == VT.getVectorNumElements() - 1 &&
21722 P.value() == (int)P.index() + 1);
21723 }))
21724 return SDValue();
21725
21726 // We optionally skip over an extend so long as both loads are extended in the
21727 // same way from the same type.
21728 unsigned Extend = 0;
21729 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21730 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21731 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21732 Extend = Scalar.getOpcode();
21733 Scalar = Scalar.getOperand(0);
21734 }
21735
21736 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21737 if (!ScalarLoad)
21738 return SDValue();
21739
21740 SDValue Vec = Shuffle->getOperand(0);
21741 if (Extend) {
21742 if (Vec.getOpcode() != Extend)
21743 return SDValue();
21744 Vec = Vec.getOperand(0);
21745 }
21746 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21747 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21748 return SDValue();
21749
21750 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21751 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21752 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21753 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21754 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21755 return SDValue();
21756
21757 // Check that the offset between the pointers to produce a single continuous
21758 // load.
21759 if (InsIndex == 0) {
21760 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21761 -1))
21762 return SDValue();
21763 } else {
21765 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21766 return SDValue();
21767 }
21768
21769 // And that the new unaligned load will be fast.
21770 unsigned IsFast = 0;
21771 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21772 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21773 Vec.getValueType(), VecLoad->getAddressSpace(),
21774 NewAlign, VecLoad->getMemOperand()->getFlags(),
21775 &IsFast) ||
21776 !IsFast)
21777 return SDValue();
21778
21779 // Calculate the new Ptr and create the new load.
21780 SDLoc DL(N);
21781 SDValue Ptr = ScalarLoad->getBasePtr();
21782 if (InsIndex != 0)
21783 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21784 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21785 MachinePointerInfo PtrInfo =
21786 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21787 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21788
21789 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21790 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21791 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21792 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21793 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21794}
21795
21796SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21797 SDValue InVec = N->getOperand(0);
21798 SDValue InVal = N->getOperand(1);
21799 SDValue EltNo = N->getOperand(2);
21800 SDLoc DL(N);
21801
21802 EVT VT = InVec.getValueType();
21803 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21804
21805 // Insert into out-of-bounds element is undefined.
21806 if (IndexC && VT.isFixedLengthVector() &&
21807 IndexC->getZExtValue() >= VT.getVectorNumElements())
21808 return DAG.getUNDEF(VT);
21809
21810 // Remove redundant insertions:
21811 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21812 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21813 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21814 return InVec;
21815
21816 if (!IndexC) {
21817 // If this is variable insert to undef vector, it might be better to splat:
21818 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21819 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21820 return DAG.getSplat(VT, DL, InVal);
21821 return SDValue();
21822 }
21823
21824 if (VT.isScalableVector())
21825 return SDValue();
21826
21827 unsigned NumElts = VT.getVectorNumElements();
21828
21829 // We must know which element is being inserted for folds below here.
21830 unsigned Elt = IndexC->getZExtValue();
21831
21832 // Handle <1 x ???> vector insertion special cases.
21833 if (NumElts == 1) {
21834 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21835 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21836 InVal.getOperand(0).getValueType() == VT &&
21837 isNullConstant(InVal.getOperand(1)))
21838 return InVal.getOperand(0);
21839 }
21840
21841 // Canonicalize insert_vector_elt dag nodes.
21842 // Example:
21843 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21844 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21845 //
21846 // Do this only if the child insert_vector node has one use; also
21847 // do this only if indices are both constants and Idx1 < Idx0.
21848 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21849 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21850 unsigned OtherElt = InVec.getConstantOperandVal(2);
21851 if (Elt < OtherElt) {
21852 // Swap nodes.
21853 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21854 InVec.getOperand(0), InVal, EltNo);
21855 AddToWorklist(NewOp.getNode());
21856 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21857 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21858 }
21859 }
21860
21861 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21862 return Shuf;
21863
21864 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21865 return Shuf;
21866
21867 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21868 return Shuf;
21869
21870 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21871 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21872 // vXi1 vector - we don't need to recurse.
21873 if (NumElts == 1)
21874 return DAG.getBuildVector(VT, DL, {InVal});
21875
21876 // If we haven't already collected the element, insert into the op list.
21877 EVT MaxEltVT = InVal.getValueType();
21878 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21879 unsigned Idx) {
21880 if (!Ops[Idx]) {
21881 Ops[Idx] = Elt;
21882 if (VT.isInteger()) {
21883 EVT EltVT = Elt.getValueType();
21884 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21885 }
21886 }
21887 };
21888
21889 // Ensure all the operands are the same value type, fill any missing
21890 // operands with UNDEF and create the BUILD_VECTOR.
21891 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21892 assert(Ops.size() == NumElts && "Unexpected vector size");
21893 for (SDValue &Op : Ops) {
21894 if (Op)
21895 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21896 else
21897 Op = DAG.getUNDEF(MaxEltVT);
21898 }
21899 return DAG.getBuildVector(VT, DL, Ops);
21900 };
21901
21902 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21903 Ops[Elt] = InVal;
21904
21905 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21906 for (SDValue CurVec = InVec; CurVec;) {
21907 // UNDEF - build new BUILD_VECTOR from already inserted operands.
21908 if (CurVec.isUndef())
21909 return CanonicalizeBuildVector(Ops);
21910
21911 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21912 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21913 for (unsigned I = 0; I != NumElts; ++I)
21914 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21915 return CanonicalizeBuildVector(Ops);
21916 }
21917
21918 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21919 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21920 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21921 return CanonicalizeBuildVector(Ops);
21922 }
21923
21924 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21925 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21926 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21927 if (CurIdx->getAPIntValue().ult(NumElts)) {
21928 unsigned Idx = CurIdx->getZExtValue();
21929 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21930
21931 // Found entire BUILD_VECTOR.
21932 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21933 return CanonicalizeBuildVector(Ops);
21934
21935 CurVec = CurVec->getOperand(0);
21936 continue;
21937 }
21938
21939 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21940 // update the shuffle mask (and second operand if we started with unary
21941 // shuffle) and create a new legal shuffle.
21942 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21943 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21944 SDValue LHS = SVN->getOperand(0);
21945 SDValue RHS = SVN->getOperand(1);
21947 bool Merged = true;
21948 for (auto I : enumerate(Ops)) {
21949 SDValue &Op = I.value();
21950 if (Op) {
21951 SmallVector<int, 16> NewMask;
21952 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21953 Merged = false;
21954 break;
21955 }
21956 Mask = std::move(NewMask);
21957 }
21958 }
21959 if (Merged)
21960 if (SDValue NewShuffle =
21961 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21962 return NewShuffle;
21963 }
21964
21965 // If all insertions are zero value, try to convert to AND mask.
21966 // TODO: Do this for -1 with OR mask?
21967 if (!LegalOperations && llvm::isNullConstant(InVal) &&
21968 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21969 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21970 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21971 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21973 for (unsigned I = 0; I != NumElts; ++I)
21974 Mask[I] = Ops[I] ? Zero : AllOnes;
21975 return DAG.getNode(ISD::AND, DL, VT, CurVec,
21976 DAG.getBuildVector(VT, DL, Mask));
21977 }
21978
21979 // Failed to find a match in the chain - bail.
21980 break;
21981 }
21982
21983 // See if we can fill in the missing constant elements as zeros.
21984 // TODO: Should we do this for any constant?
21985 APInt DemandedZeroElts = APInt::getZero(NumElts);
21986 for (unsigned I = 0; I != NumElts; ++I)
21987 if (!Ops[I])
21988 DemandedZeroElts.setBit(I);
21989
21990 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
21991 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
21992 : DAG.getConstantFP(0, DL, MaxEltVT);
21993 for (unsigned I = 0; I != NumElts; ++I)
21994 if (!Ops[I])
21995 Ops[I] = Zero;
21996
21997 return CanonicalizeBuildVector(Ops);
21998 }
21999 }
22000
22001 return SDValue();
22002}
22003
22004SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22005 SDValue EltNo,
22006 LoadSDNode *OriginalLoad) {
22007 assert(OriginalLoad->isSimple());
22008
22009 EVT ResultVT = EVE->getValueType(0);
22010 EVT VecEltVT = InVecVT.getVectorElementType();
22011
22012 // If the vector element type is not a multiple of a byte then we are unable
22013 // to correctly compute an address to load only the extracted element as a
22014 // scalar.
22015 if (!VecEltVT.isByteSized())
22016 return SDValue();
22017
22018 ISD::LoadExtType ExtTy =
22019 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22020 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22021 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22022 return SDValue();
22023
22024 Align Alignment = OriginalLoad->getAlign();
22026 SDLoc DL(EVE);
22027 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22028 int Elt = ConstEltNo->getZExtValue();
22029 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22030 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22031 Alignment = commonAlignment(Alignment, PtrOff);
22032 } else {
22033 // Discard the pointer info except the address space because the memory
22034 // operand can't represent this new access since the offset is variable.
22035 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22036 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22037 }
22038
22039 unsigned IsFast = 0;
22040 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22041 OriginalLoad->getAddressSpace(), Alignment,
22042 OriginalLoad->getMemOperand()->getFlags(),
22043 &IsFast) ||
22044 !IsFast)
22045 return SDValue();
22046
22047 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22048 InVecVT, EltNo);
22049
22050 // We are replacing a vector load with a scalar load. The new load must have
22051 // identical memory op ordering to the original.
22052 SDValue Load;
22053 if (ResultVT.bitsGT(VecEltVT)) {
22054 // If the result type of vextract is wider than the load, then issue an
22055 // extending load instead.
22056 ISD::LoadExtType ExtType =
22057 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22058 : ISD::EXTLOAD;
22059 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22060 NewPtr, MPI, VecEltVT, Alignment,
22061 OriginalLoad->getMemOperand()->getFlags(),
22062 OriginalLoad->getAAInfo());
22063 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22064 } else {
22065 // The result type is narrower or the same width as the vector element
22066 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22067 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22068 OriginalLoad->getAAInfo());
22069 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22070 if (ResultVT.bitsLT(VecEltVT))
22071 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22072 else
22073 Load = DAG.getBitcast(ResultVT, Load);
22074 }
22075 ++OpsNarrowed;
22076 return Load;
22077}
22078
22079/// Transform a vector binary operation into a scalar binary operation by moving
22080/// the math/logic after an extract element of a vector.
22082 bool LegalOperations) {
22083 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22084 SDValue Vec = ExtElt->getOperand(0);
22085 SDValue Index = ExtElt->getOperand(1);
22086 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22087 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22088 Vec->getNumValues() != 1)
22089 return SDValue();
22090
22091 // Targets may want to avoid this to prevent an expensive register transfer.
22092 if (!TLI.shouldScalarizeBinop(Vec))
22093 return SDValue();
22094
22095 // Extracting an element of a vector constant is constant-folded, so this
22096 // transform is just replacing a vector op with a scalar op while moving the
22097 // extract.
22098 SDValue Op0 = Vec.getOperand(0);
22099 SDValue Op1 = Vec.getOperand(1);
22100 APInt SplatVal;
22101 if (isAnyConstantBuildVector(Op0, true) ||
22102 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22103 isAnyConstantBuildVector(Op1, true) ||
22104 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22105 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22106 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22107 SDLoc DL(ExtElt);
22108 EVT VT = ExtElt->getValueType(0);
22109 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22110 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22111 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22112 }
22113
22114 return SDValue();
22115}
22116
22117// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22118// recursively analyse all of it's users. and try to model themselves as
22119// bit sequence extractions. If all of them agree on the new, narrower element
22120// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22121// new element type, do so now.
22122// This is mainly useful to recover from legalization that scalarized
22123// the vector as wide elements, but tries to rebuild it with narrower elements.
22124//
22125// Some more nodes could be modelled if that helps cover interesting patterns.
22126bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22127 SDNode *N) {
22128 // We perform this optimization post type-legalization because
22129 // the type-legalizer often scalarizes integer-promoted vectors.
22130 // Performing this optimization before may cause legalizaton cycles.
22131 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22132 return false;
22133
22134 // TODO: Add support for big-endian.
22135 if (DAG.getDataLayout().isBigEndian())
22136 return false;
22137
22138 SDValue VecOp = N->getOperand(0);
22139 EVT VecVT = VecOp.getValueType();
22140 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22141
22142 // We must start with a constant extraction index.
22143 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22144 if (!IndexC)
22145 return false;
22146
22147 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22148 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22149
22150 // TODO: deal with the case of implicit anyext of the extraction.
22151 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22152 EVT ScalarVT = N->getValueType(0);
22153 if (VecVT.getScalarType() != ScalarVT)
22154 return false;
22155
22156 // TODO: deal with the cases other than everything being integer-typed.
22157 if (!ScalarVT.isScalarInteger())
22158 return false;
22159
22160 struct Entry {
22162
22163 // Which bits of VecOp does it contain?
22164 unsigned BitPos;
22165 int NumBits;
22166 // NOTE: the actual width of \p Producer may be wider than NumBits!
22167
22168 Entry(Entry &&) = default;
22169 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22170 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22171
22172 Entry() = delete;
22173 Entry(const Entry &) = delete;
22174 Entry &operator=(const Entry &) = delete;
22175 Entry &operator=(Entry &&) = delete;
22176 };
22177 SmallVector<Entry, 32> Worklist;
22179
22180 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22181 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22182 /*NumBits=*/VecEltBitWidth);
22183
22184 while (!Worklist.empty()) {
22185 Entry E = Worklist.pop_back_val();
22186 // Does the node not even use any of the VecOp bits?
22187 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22188 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22189 return false; // Let's allow the other combines clean this up first.
22190 // Did we fail to model any of the users of the Producer?
22191 bool ProducerIsLeaf = false;
22192 // Look at each user of this Producer.
22193 for (SDNode *User : E.Producer->uses()) {
22194 switch (User->getOpcode()) {
22195 // TODO: support ISD::BITCAST
22196 // TODO: support ISD::ANY_EXTEND
22197 // TODO: support ISD::ZERO_EXTEND
22198 // TODO: support ISD::SIGN_EXTEND
22199 case ISD::TRUNCATE:
22200 // Truncation simply means we keep position, but extract less bits.
22201 Worklist.emplace_back(User, E.BitPos,
22202 /*NumBits=*/User->getValueSizeInBits(0));
22203 break;
22204 // TODO: support ISD::SRA
22205 // TODO: support ISD::SHL
22206 case ISD::SRL:
22207 // We should be shifting the Producer by a constant amount.
22208 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22209 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22210 // Logical right-shift means that we start extraction later,
22211 // but stop it at the same position we did previously.
22212 unsigned ShAmt = ShAmtC->getZExtValue();
22213 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22214 break;
22215 }
22216 [[fallthrough]];
22217 default:
22218 // We can not model this user of the Producer.
22219 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22220 ProducerIsLeaf = true;
22221 // Profitability check: all users that we can not model
22222 // must be ISD::BUILD_VECTOR's.
22223 if (User->getOpcode() != ISD::BUILD_VECTOR)
22224 return false;
22225 break;
22226 }
22227 }
22228 if (ProducerIsLeaf)
22229 Leafs.emplace_back(std::move(E));
22230 }
22231
22232 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22233
22234 // If we are still at the same element granularity, give up,
22235 if (NewVecEltBitWidth == VecEltBitWidth)
22236 return false;
22237
22238 // The vector width must be a multiple of the new element width.
22239 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22240 return false;
22241
22242 // All leafs must agree on the new element width.
22243 // All leafs must not expect any "padding" bits ontop of that width.
22244 // All leafs must start extraction from multiple of that width.
22245 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22246 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22247 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22248 E.BitPos % NewVecEltBitWidth == 0;
22249 }))
22250 return false;
22251
22252 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22253 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22254 VecVT.getSizeInBits() / NewVecEltBitWidth);
22255
22256 if (LegalTypes &&
22257 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22258 return false;
22259
22260 if (LegalOperations &&
22261 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22263 return false;
22264
22265 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22266 for (const Entry &E : Leafs) {
22267 SDLoc DL(E.Producer);
22268 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22269 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22270 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22271 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22272 DAG.getVectorIdxConstant(NewIndex, DL));
22273 CombineTo(E.Producer, V);
22274 }
22275
22276 return true;
22277}
22278
22279SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22280 SDValue VecOp = N->getOperand(0);
22281 SDValue Index = N->getOperand(1);
22282 EVT ScalarVT = N->getValueType(0);
22283 EVT VecVT = VecOp.getValueType();
22284 if (VecOp.isUndef())
22285 return DAG.getUNDEF(ScalarVT);
22286
22287 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22288 //
22289 // This only really matters if the index is non-constant since other combines
22290 // on the constant elements already work.
22291 SDLoc DL(N);
22292 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22293 Index == VecOp.getOperand(2)) {
22294 SDValue Elt = VecOp.getOperand(1);
22295 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22296 }
22297
22298 // (vextract (scalar_to_vector val, 0) -> val
22299 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22300 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22301 if (DAG.isKnownNeverZero(Index))
22302 return DAG.getUNDEF(ScalarVT);
22303
22304 // Check if the result type doesn't match the inserted element type.
22305 // The inserted element and extracted element may have mismatched bitwidth.
22306 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22307 SDValue InOp = VecOp.getOperand(0);
22308 if (InOp.getValueType() != ScalarVT) {
22309 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22310 if (InOp.getValueType().bitsGT(ScalarVT))
22311 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22312 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22313 }
22314 return InOp;
22315 }
22316
22317 // extract_vector_elt of out-of-bounds element -> UNDEF
22318 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22319 if (IndexC && VecVT.isFixedLengthVector() &&
22320 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22321 return DAG.getUNDEF(ScalarVT);
22322
22323 // extract_vector_elt (build_vector x, y), 1 -> y
22324 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22325 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22326 TLI.isTypeLegal(VecVT)) {
22327 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22328 VecVT.isFixedLengthVector()) &&
22329 "BUILD_VECTOR used for scalable vectors");
22330 unsigned IndexVal =
22331 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22332 SDValue Elt = VecOp.getOperand(IndexVal);
22333 EVT InEltVT = Elt.getValueType();
22334
22335 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22336 isNullConstant(Elt)) {
22337 // Sometimes build_vector's scalar input types do not match result type.
22338 if (ScalarVT == InEltVT)
22339 return Elt;
22340
22341 // TODO: It may be useful to truncate if free if the build_vector
22342 // implicitly converts.
22343 }
22344 }
22345
22346 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22347 return BO;
22348
22349 if (VecVT.isScalableVector())
22350 return SDValue();
22351
22352 // All the code from this point onwards assumes fixed width vectors, but it's
22353 // possible that some of the combinations could be made to work for scalable
22354 // vectors too.
22355 unsigned NumElts = VecVT.getVectorNumElements();
22356 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22357
22358 // See if the extracted element is constant, in which case fold it if its
22359 // a legal fp immediate.
22360 if (IndexC && ScalarVT.isFloatingPoint()) {
22361 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22362 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22363 if (KnownElt.isConstant()) {
22364 APFloat CstFP =
22365 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22366 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22367 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22368 }
22369 }
22370
22371 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22372 // there are regressions on multiple targets without it. We can end up with a
22373 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22374 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22375 VecOp.hasOneUse()) {
22376 // The vector index of the LSBs of the source depend on the endian-ness.
22377 bool IsLE = DAG.getDataLayout().isLittleEndian();
22378 unsigned ExtractIndex = IndexC->getZExtValue();
22379 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22380 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22381 SDValue BCSrc = VecOp.getOperand(0);
22382 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22383 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22384
22385 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22386 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22387 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22388 // trunc i64 X to i32
22389 SDValue X = BCSrc.getOperand(0);
22390 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22391 "Extract element and scalar to vector can't change element type "
22392 "from FP to integer.");
22393 unsigned XBitWidth = X.getValueSizeInBits();
22394 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22395
22396 // An extract element return value type can be wider than its vector
22397 // operand element type. In that case, the high bits are undefined, so
22398 // it's possible that we may need to extend rather than truncate.
22399 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22400 assert(XBitWidth % VecEltBitWidth == 0 &&
22401 "Scalar bitwidth must be a multiple of vector element bitwidth");
22402 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22403 }
22404 }
22405 }
22406
22407 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22408 // We only perform this optimization before the op legalization phase because
22409 // we may introduce new vector instructions which are not backed by TD
22410 // patterns. For example on AVX, extracting elements from a wide vector
22411 // without using extract_subvector. However, if we can find an underlying
22412 // scalar value, then we can always use that.
22413 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22414 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22415 // Find the new index to extract from.
22416 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22417
22418 // Extracting an undef index is undef.
22419 if (OrigElt == -1)
22420 return DAG.getUNDEF(ScalarVT);
22421
22422 // Select the right vector half to extract from.
22423 SDValue SVInVec;
22424 if (OrigElt < (int)NumElts) {
22425 SVInVec = VecOp.getOperand(0);
22426 } else {
22427 SVInVec = VecOp.getOperand(1);
22428 OrigElt -= NumElts;
22429 }
22430
22431 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22432 SDValue InOp = SVInVec.getOperand(OrigElt);
22433 if (InOp.getValueType() != ScalarVT) {
22434 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22435 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22436 }
22437
22438 return InOp;
22439 }
22440
22441 // FIXME: We should handle recursing on other vector shuffles and
22442 // scalar_to_vector here as well.
22443
22444 if (!LegalOperations ||
22445 // FIXME: Should really be just isOperationLegalOrCustom.
22448 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22449 DAG.getVectorIdxConstant(OrigElt, DL));
22450 }
22451 }
22452
22453 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22454 // simplify it based on the (valid) extraction indices.
22455 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22456 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22457 Use->getOperand(0) == VecOp &&
22458 isa<ConstantSDNode>(Use->getOperand(1));
22459 })) {
22460 APInt DemandedElts = APInt::getZero(NumElts);
22461 for (SDNode *Use : VecOp->uses()) {
22462 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22463 if (CstElt->getAPIntValue().ult(NumElts))
22464 DemandedElts.setBit(CstElt->getZExtValue());
22465 }
22466 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22467 // We simplified the vector operand of this extract element. If this
22468 // extract is not dead, visit it again so it is folded properly.
22469 if (N->getOpcode() != ISD::DELETED_NODE)
22470 AddToWorklist(N);
22471 return SDValue(N, 0);
22472 }
22473 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22474 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22475 // We simplified the vector operand of this extract element. If this
22476 // extract is not dead, visit it again so it is folded properly.
22477 if (N->getOpcode() != ISD::DELETED_NODE)
22478 AddToWorklist(N);
22479 return SDValue(N, 0);
22480 }
22481 }
22482
22483 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22484 return SDValue(N, 0);
22485
22486 // Everything under here is trying to match an extract of a loaded value.
22487 // If the result of load has to be truncated, then it's not necessarily
22488 // profitable.
22489 bool BCNumEltsChanged = false;
22490 EVT ExtVT = VecVT.getVectorElementType();
22491 EVT LVT = ExtVT;
22492 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22493 return SDValue();
22494
22495 if (VecOp.getOpcode() == ISD::BITCAST) {
22496 // Don't duplicate a load with other uses.
22497 if (!VecOp.hasOneUse())
22498 return SDValue();
22499
22500 EVT BCVT = VecOp.getOperand(0).getValueType();
22501 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22502 return SDValue();
22503 if (NumElts != BCVT.getVectorNumElements())
22504 BCNumEltsChanged = true;
22505 VecOp = VecOp.getOperand(0);
22506 ExtVT = BCVT.getVectorElementType();
22507 }
22508
22509 // extract (vector load $addr), i --> load $addr + i * size
22510 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22511 ISD::isNormalLoad(VecOp.getNode()) &&
22512 !Index->hasPredecessor(VecOp.getNode())) {
22513 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22514 if (VecLoad && VecLoad->isSimple())
22515 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22516 }
22517
22518 // Perform only after legalization to ensure build_vector / vector_shuffle
22519 // optimizations have already been done.
22520 if (!LegalOperations || !IndexC)
22521 return SDValue();
22522
22523 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22524 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22525 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22526 int Elt = IndexC->getZExtValue();
22527 LoadSDNode *LN0 = nullptr;
22528 if (ISD::isNormalLoad(VecOp.getNode())) {
22529 LN0 = cast<LoadSDNode>(VecOp);
22530 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22531 VecOp.getOperand(0).getValueType() == ExtVT &&
22532 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22533 // Don't duplicate a load with other uses.
22534 if (!VecOp.hasOneUse())
22535 return SDValue();
22536
22537 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22538 }
22539 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22540 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22541 // =>
22542 // (load $addr+1*size)
22543
22544 // Don't duplicate a load with other uses.
22545 if (!VecOp.hasOneUse())
22546 return SDValue();
22547
22548 // If the bit convert changed the number of elements, it is unsafe
22549 // to examine the mask.
22550 if (BCNumEltsChanged)
22551 return SDValue();
22552
22553 // Select the input vector, guarding against out of range extract vector.
22554 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22555 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22556
22557 if (VecOp.getOpcode() == ISD::BITCAST) {
22558 // Don't duplicate a load with other uses.
22559 if (!VecOp.hasOneUse())
22560 return SDValue();
22561
22562 VecOp = VecOp.getOperand(0);
22563 }
22564 if (ISD::isNormalLoad(VecOp.getNode())) {
22565 LN0 = cast<LoadSDNode>(VecOp);
22566 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22567 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22568 }
22569 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22570 VecVT.getVectorElementType() == ScalarVT &&
22571 (!LegalTypes ||
22572 TLI.isTypeLegal(
22574 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22575 // -> extract_vector_elt a, 0
22576 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22577 // -> extract_vector_elt a, 1
22578 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22579 // -> extract_vector_elt b, 0
22580 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22581 // -> extract_vector_elt b, 1
22582 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22583 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22584 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22585 Index.getValueType());
22586
22587 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22589 ConcatVT.getVectorElementType(),
22590 ConcatOp, NewIdx);
22591 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22592 }
22593
22594 // Make sure we found a non-volatile load and the extractelement is
22595 // the only use.
22596 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22597 return SDValue();
22598
22599 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22600 if (Elt == -1)
22601 return DAG.getUNDEF(LVT);
22602
22603 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22604}
22605
22606// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22607SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22608 // We perform this optimization post type-legalization because
22609 // the type-legalizer often scalarizes integer-promoted vectors.
22610 // Performing this optimization before may create bit-casts which
22611 // will be type-legalized to complex code sequences.
22612 // We perform this optimization only before the operation legalizer because we
22613 // may introduce illegal operations.
22614 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22615 return SDValue();
22616
22617 unsigned NumInScalars = N->getNumOperands();
22618 SDLoc DL(N);
22619 EVT VT = N->getValueType(0);
22620
22621 // Check to see if this is a BUILD_VECTOR of a bunch of values
22622 // which come from any_extend or zero_extend nodes. If so, we can create
22623 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22624 // optimizations. We do not handle sign-extend because we can't fill the sign
22625 // using shuffles.
22626 EVT SourceType = MVT::Other;
22627 bool AllAnyExt = true;
22628
22629 for (unsigned i = 0; i != NumInScalars; ++i) {
22630 SDValue In = N->getOperand(i);
22631 // Ignore undef inputs.
22632 if (In.isUndef()) continue;
22633
22634 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22635 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22636
22637 // Abort if the element is not an extension.
22638 if (!ZeroExt && !AnyExt) {
22639 SourceType = MVT::Other;
22640 break;
22641 }
22642
22643 // The input is a ZeroExt or AnyExt. Check the original type.
22644 EVT InTy = In.getOperand(0).getValueType();
22645
22646 // Check that all of the widened source types are the same.
22647 if (SourceType == MVT::Other)
22648 // First time.
22649 SourceType = InTy;
22650 else if (InTy != SourceType) {
22651 // Multiple income types. Abort.
22652 SourceType = MVT::Other;
22653 break;
22654 }
22655
22656 // Check if all of the extends are ANY_EXTENDs.
22657 AllAnyExt &= AnyExt;
22658 }
22659
22660 // In order to have valid types, all of the inputs must be extended from the
22661 // same source type and all of the inputs must be any or zero extend.
22662 // Scalar sizes must be a power of two.
22663 EVT OutScalarTy = VT.getScalarType();
22664 bool ValidTypes =
22665 SourceType != MVT::Other &&
22666 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22667 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22668
22669 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22670 // turn into a single shuffle instruction.
22671 if (!ValidTypes)
22672 return SDValue();
22673
22674 // If we already have a splat buildvector, then don't fold it if it means
22675 // introducing zeros.
22676 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22677 return SDValue();
22678
22679 bool isLE = DAG.getDataLayout().isLittleEndian();
22680 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22681 assert(ElemRatio > 1 && "Invalid element size ratio");
22682 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22683 DAG.getConstant(0, DL, SourceType);
22684
22685 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22686 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22687
22688 // Populate the new build_vector
22689 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22690 SDValue Cast = N->getOperand(i);
22691 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22692 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22693 Cast.isUndef()) && "Invalid cast opcode");
22694 SDValue In;
22695 if (Cast.isUndef())
22696 In = DAG.getUNDEF(SourceType);
22697 else
22698 In = Cast->getOperand(0);
22699 unsigned Index = isLE ? (i * ElemRatio) :
22700 (i * ElemRatio + (ElemRatio - 1));
22701
22702 assert(Index < Ops.size() && "Invalid index");
22703 Ops[Index] = In;
22704 }
22705
22706 // The type of the new BUILD_VECTOR node.
22707 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22708 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22709 "Invalid vector size");
22710 // Check if the new vector type is legal.
22711 if (!isTypeLegal(VecVT) ||
22712 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22714 return SDValue();
22715
22716 // Make the new BUILD_VECTOR.
22717 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22718
22719 // The new BUILD_VECTOR node has the potential to be further optimized.
22720 AddToWorklist(BV.getNode());
22721 // Bitcast to the desired type.
22722 return DAG.getBitcast(VT, BV);
22723}
22724
22725// Simplify (build_vec (trunc $1)
22726// (trunc (srl $1 half-width))
22727// (trunc (srl $1 (2 * half-width))))
22728// to (bitcast $1)
22729SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22730 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22731
22732 EVT VT = N->getValueType(0);
22733
22734 // Don't run this before LegalizeTypes if VT is legal.
22735 // Targets may have other preferences.
22736 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22737 return SDValue();
22738
22739 // Only for little endian
22740 if (!DAG.getDataLayout().isLittleEndian())
22741 return SDValue();
22742
22743 SDLoc DL(N);
22744 EVT OutScalarTy = VT.getScalarType();
22745 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22746
22747 // Only for power of two types to be sure that bitcast works well
22748 if (!isPowerOf2_64(ScalarTypeBitsize))
22749 return SDValue();
22750
22751 unsigned NumInScalars = N->getNumOperands();
22752
22753 // Look through bitcasts
22754 auto PeekThroughBitcast = [](SDValue Op) {
22755 if (Op.getOpcode() == ISD::BITCAST)
22756 return Op.getOperand(0);
22757 return Op;
22758 };
22759
22760 // The source value where all the parts are extracted.
22761 SDValue Src;
22762 for (unsigned i = 0; i != NumInScalars; ++i) {
22763 SDValue In = PeekThroughBitcast(N->getOperand(i));
22764 // Ignore undef inputs.
22765 if (In.isUndef()) continue;
22766
22767 if (In.getOpcode() != ISD::TRUNCATE)
22768 return SDValue();
22769
22770 In = PeekThroughBitcast(In.getOperand(0));
22771
22772 if (In.getOpcode() != ISD::SRL) {
22773 // For now only build_vec without shuffling, handle shifts here in the
22774 // future.
22775 if (i != 0)
22776 return SDValue();
22777
22778 Src = In;
22779 } else {
22780 // In is SRL
22781 SDValue part = PeekThroughBitcast(In.getOperand(0));
22782
22783 if (!Src) {
22784 Src = part;
22785 } else if (Src != part) {
22786 // Vector parts do not stem from the same variable
22787 return SDValue();
22788 }
22789
22790 SDValue ShiftAmtVal = In.getOperand(1);
22791 if (!isa<ConstantSDNode>(ShiftAmtVal))
22792 return SDValue();
22793
22794 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22795
22796 // The extracted value is not extracted at the right position
22797 if (ShiftAmt != i * ScalarTypeBitsize)
22798 return SDValue();
22799 }
22800 }
22801
22802 // Only cast if the size is the same
22803 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22804 return SDValue();
22805
22806 return DAG.getBitcast(VT, Src);
22807}
22808
22809SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22810 ArrayRef<int> VectorMask,
22811 SDValue VecIn1, SDValue VecIn2,
22812 unsigned LeftIdx, bool DidSplitVec) {
22813 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22814
22815 EVT VT = N->getValueType(0);
22816 EVT InVT1 = VecIn1.getValueType();
22817 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22818
22819 unsigned NumElems = VT.getVectorNumElements();
22820 unsigned ShuffleNumElems = NumElems;
22821
22822 // If we artificially split a vector in two already, then the offsets in the
22823 // operands will all be based off of VecIn1, even those in VecIn2.
22824 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22825
22826 uint64_t VTSize = VT.getFixedSizeInBits();
22827 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22828 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22829
22830 assert(InVT2Size <= InVT1Size &&
22831 "Inputs must be sorted to be in non-increasing vector size order.");
22832
22833 // We can't generate a shuffle node with mismatched input and output types.
22834 // Try to make the types match the type of the output.
22835 if (InVT1 != VT || InVT2 != VT) {
22836 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22837 // If the output vector length is a multiple of both input lengths,
22838 // we can concatenate them and pad the rest with undefs.
22839 unsigned NumConcats = VTSize / InVT1Size;
22840 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22841 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22842 ConcatOps[0] = VecIn1;
22843 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22844 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22845 VecIn2 = SDValue();
22846 } else if (InVT1Size == VTSize * 2) {
22847 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22848 return SDValue();
22849
22850 if (!VecIn2.getNode()) {
22851 // If we only have one input vector, and it's twice the size of the
22852 // output, split it in two.
22853 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22854 DAG.getVectorIdxConstant(NumElems, DL));
22855 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22856 // Since we now have shorter input vectors, adjust the offset of the
22857 // second vector's start.
22858 Vec2Offset = NumElems;
22859 } else {
22860 assert(InVT2Size <= InVT1Size &&
22861 "Second input is not going to be larger than the first one.");
22862
22863 // VecIn1 is wider than the output, and we have another, possibly
22864 // smaller input. Pad the smaller input with undefs, shuffle at the
22865 // input vector width, and extract the output.
22866 // The shuffle type is different than VT, so check legality again.
22867 if (LegalOperations &&
22869 return SDValue();
22870
22871 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22872 // lower it back into a BUILD_VECTOR. So if the inserted type is
22873 // illegal, don't even try.
22874 if (InVT1 != InVT2) {
22875 if (!TLI.isTypeLegal(InVT2))
22876 return SDValue();
22877 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22878 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22879 }
22880 ShuffleNumElems = NumElems * 2;
22881 }
22882 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22883 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22884 ConcatOps[0] = VecIn2;
22885 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22886 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22887 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22888 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22889 return SDValue();
22890 // If dest vector has less than two elements, then use shuffle and extract
22891 // from larger regs will cost even more.
22892 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22893 return SDValue();
22894 assert(InVT2Size <= InVT1Size &&
22895 "Second input is not going to be larger than the first one.");
22896
22897 // VecIn1 is wider than the output, and we have another, possibly
22898 // smaller input. Pad the smaller input with undefs, shuffle at the
22899 // input vector width, and extract the output.
22900 // The shuffle type is different than VT, so check legality again.
22901 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22902 return SDValue();
22903
22904 if (InVT1 != InVT2) {
22905 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22906 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22907 }
22908 ShuffleNumElems = InVT1Size / VTSize * NumElems;
22909 } else {
22910 // TODO: Support cases where the length mismatch isn't exactly by a
22911 // factor of 2.
22912 // TODO: Move this check upwards, so that if we have bad type
22913 // mismatches, we don't create any DAG nodes.
22914 return SDValue();
22915 }
22916 }
22917
22918 // Initialize mask to undef.
22919 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22920
22921 // Only need to run up to the number of elements actually used, not the
22922 // total number of elements in the shuffle - if we are shuffling a wider
22923 // vector, the high lanes should be set to undef.
22924 for (unsigned i = 0; i != NumElems; ++i) {
22925 if (VectorMask[i] <= 0)
22926 continue;
22927
22928 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22929 if (VectorMask[i] == (int)LeftIdx) {
22930 Mask[i] = ExtIndex;
22931 } else if (VectorMask[i] == (int)LeftIdx + 1) {
22932 Mask[i] = Vec2Offset + ExtIndex;
22933 }
22934 }
22935
22936 // The type the input vectors may have changed above.
22937 InVT1 = VecIn1.getValueType();
22938
22939 // If we already have a VecIn2, it should have the same type as VecIn1.
22940 // If we don't, get an undef/zero vector of the appropriate type.
22941 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22942 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22943
22944 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22945 if (ShuffleNumElems > NumElems)
22946 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22947
22948 return Shuffle;
22949}
22950
22952 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22953
22954 // First, determine where the build vector is not undef.
22955 // TODO: We could extend this to handle zero elements as well as undefs.
22956 int NumBVOps = BV->getNumOperands();
22957 int ZextElt = -1;
22958 for (int i = 0; i != NumBVOps; ++i) {
22959 SDValue Op = BV->getOperand(i);
22960 if (Op.isUndef())
22961 continue;
22962 if (ZextElt == -1)
22963 ZextElt = i;
22964 else
22965 return SDValue();
22966 }
22967 // Bail out if there's no non-undef element.
22968 if (ZextElt == -1)
22969 return SDValue();
22970
22971 // The build vector contains some number of undef elements and exactly
22972 // one other element. That other element must be a zero-extended scalar
22973 // extracted from a vector at a constant index to turn this into a shuffle.
22974 // Also, require that the build vector does not implicitly truncate/extend
22975 // its elements.
22976 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22977 EVT VT = BV->getValueType(0);
22978 SDValue Zext = BV->getOperand(ZextElt);
22979 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22981 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22983 return SDValue();
22984
22985 // The zero-extend must be a multiple of the source size, and we must be
22986 // building a vector of the same size as the source of the extract element.
22987 SDValue Extract = Zext.getOperand(0);
22988 unsigned DestSize = Zext.getValueSizeInBits();
22989 unsigned SrcSize = Extract.getValueSizeInBits();
22990 if (DestSize % SrcSize != 0 ||
22991 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
22992 return SDValue();
22993
22994 // Create a shuffle mask that will combine the extracted element with zeros
22995 // and undefs.
22996 int ZextRatio = DestSize / SrcSize;
22997 int NumMaskElts = NumBVOps * ZextRatio;
22998 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
22999 for (int i = 0; i != NumMaskElts; ++i) {
23000 if (i / ZextRatio == ZextElt) {
23001 // The low bits of the (potentially translated) extracted element map to
23002 // the source vector. The high bits map to zero. We will use a zero vector
23003 // as the 2nd source operand of the shuffle, so use the 1st element of
23004 // that vector (mask value is number-of-elements) for the high bits.
23005 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23006 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23007 : NumMaskElts;
23008 }
23009
23010 // Undef elements of the build vector remain undef because we initialize
23011 // the shuffle mask with -1.
23012 }
23013
23014 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23015 // bitcast (shuffle V, ZeroVec, VectorMask)
23016 SDLoc DL(BV);
23017 EVT VecVT = Extract.getOperand(0).getValueType();
23018 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23019 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23020 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23021 ZeroVec, ShufMask, DAG);
23022 if (!Shuf)
23023 return SDValue();
23024 return DAG.getBitcast(VT, Shuf);
23025}
23026
23027// FIXME: promote to STLExtras.
23028template <typename R, typename T>
23029static auto getFirstIndexOf(R &&Range, const T &Val) {
23030 auto I = find(Range, Val);
23031 if (I == Range.end())
23032 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23033 return std::distance(Range.begin(), I);
23034}
23035
23036// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23037// operations. If the types of the vectors we're extracting from allow it,
23038// turn this into a vector_shuffle node.
23039SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23040 SDLoc DL(N);
23041 EVT VT = N->getValueType(0);
23042
23043 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23044 if (!isTypeLegal(VT))
23045 return SDValue();
23046
23048 return V;
23049
23050 // May only combine to shuffle after legalize if shuffle is legal.
23051 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23052 return SDValue();
23053
23054 bool UsesZeroVector = false;
23055 unsigned NumElems = N->getNumOperands();
23056
23057 // Record, for each element of the newly built vector, which input vector
23058 // that element comes from. -1 stands for undef, 0 for the zero vector,
23059 // and positive values for the input vectors.
23060 // VectorMask maps each element to its vector number, and VecIn maps vector
23061 // numbers to their initial SDValues.
23062
23063 SmallVector<int, 8> VectorMask(NumElems, -1);
23065 VecIn.push_back(SDValue());
23066
23067 for (unsigned i = 0; i != NumElems; ++i) {
23068 SDValue Op = N->getOperand(i);
23069
23070 if (Op.isUndef())
23071 continue;
23072
23073 // See if we can use a blend with a zero vector.
23074 // TODO: Should we generalize this to a blend with an arbitrary constant
23075 // vector?
23077 UsesZeroVector = true;
23078 VectorMask[i] = 0;
23079 continue;
23080 }
23081
23082 // Not an undef or zero. If the input is something other than an
23083 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23084 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23085 !isa<ConstantSDNode>(Op.getOperand(1)))
23086 return SDValue();
23087 SDValue ExtractedFromVec = Op.getOperand(0);
23088
23089 if (ExtractedFromVec.getValueType().isScalableVector())
23090 return SDValue();
23091
23092 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23093 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23094 return SDValue();
23095
23096 // All inputs must have the same element type as the output.
23097 if (VT.getVectorElementType() !=
23098 ExtractedFromVec.getValueType().getVectorElementType())
23099 return SDValue();
23100
23101 // Have we seen this input vector before?
23102 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23103 // a map back from SDValues to numbers isn't worth it.
23104 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23105 if (Idx == -1) { // A new source vector?
23106 Idx = VecIn.size();
23107 VecIn.push_back(ExtractedFromVec);
23108 }
23109
23110 VectorMask[i] = Idx;
23111 }
23112
23113 // If we didn't find at least one input vector, bail out.
23114 if (VecIn.size() < 2)
23115 return SDValue();
23116
23117 // If all the Operands of BUILD_VECTOR extract from same
23118 // vector, then split the vector efficiently based on the maximum
23119 // vector access index and adjust the VectorMask and
23120 // VecIn accordingly.
23121 bool DidSplitVec = false;
23122 if (VecIn.size() == 2) {
23123 unsigned MaxIndex = 0;
23124 unsigned NearestPow2 = 0;
23125 SDValue Vec = VecIn.back();
23126 EVT InVT = Vec.getValueType();
23127 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23128
23129 for (unsigned i = 0; i < NumElems; i++) {
23130 if (VectorMask[i] <= 0)
23131 continue;
23132 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23133 IndexVec[i] = Index;
23134 MaxIndex = std::max(MaxIndex, Index);
23135 }
23136
23137 NearestPow2 = PowerOf2Ceil(MaxIndex);
23138 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23139 NumElems * 2 < NearestPow2) {
23140 unsigned SplitSize = NearestPow2 / 2;
23141 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23142 InVT.getVectorElementType(), SplitSize);
23143 if (TLI.isTypeLegal(SplitVT) &&
23144 SplitSize + SplitVT.getVectorNumElements() <=
23145 InVT.getVectorNumElements()) {
23146 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23147 DAG.getVectorIdxConstant(SplitSize, DL));
23148 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23149 DAG.getVectorIdxConstant(0, DL));
23150 VecIn.pop_back();
23151 VecIn.push_back(VecIn1);
23152 VecIn.push_back(VecIn2);
23153 DidSplitVec = true;
23154
23155 for (unsigned i = 0; i < NumElems; i++) {
23156 if (VectorMask[i] <= 0)
23157 continue;
23158 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23159 }
23160 }
23161 }
23162 }
23163
23164 // Sort input vectors by decreasing vector element count,
23165 // while preserving the relative order of equally-sized vectors.
23166 // Note that we keep the first "implicit zero vector as-is.
23167 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23168 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23169 [](const SDValue &a, const SDValue &b) {
23170 return a.getValueType().getVectorNumElements() >
23171 b.getValueType().getVectorNumElements();
23172 });
23173
23174 // We now also need to rebuild the VectorMask, because it referenced element
23175 // order in VecIn, and we just sorted them.
23176 for (int &SourceVectorIndex : VectorMask) {
23177 if (SourceVectorIndex <= 0)
23178 continue;
23179 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23180 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23181 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23182 SourceVectorIndex = Idx;
23183 }
23184
23185 VecIn = std::move(SortedVecIn);
23186
23187 // TODO: Should this fire if some of the input vectors has illegal type (like
23188 // it does now), or should we let legalization run its course first?
23189
23190 // Shuffle phase:
23191 // Take pairs of vectors, and shuffle them so that the result has elements
23192 // from these vectors in the correct places.
23193 // For example, given:
23194 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23195 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23196 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23197 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23198 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23199 // We will generate:
23200 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23201 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23202 SmallVector<SDValue, 4> Shuffles;
23203 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23204 unsigned LeftIdx = 2 * In + 1;
23205 SDValue VecLeft = VecIn[LeftIdx];
23206 SDValue VecRight =
23207 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23208
23209 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23210 VecRight, LeftIdx, DidSplitVec))
23211 Shuffles.push_back(Shuffle);
23212 else
23213 return SDValue();
23214 }
23215
23216 // If we need the zero vector as an "ingredient" in the blend tree, add it
23217 // to the list of shuffles.
23218 if (UsesZeroVector)
23219 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23220 : DAG.getConstantFP(0.0, DL, VT));
23221
23222 // If we only have one shuffle, we're done.
23223 if (Shuffles.size() == 1)
23224 return Shuffles[0];
23225
23226 // Update the vector mask to point to the post-shuffle vectors.
23227 for (int &Vec : VectorMask)
23228 if (Vec == 0)
23229 Vec = Shuffles.size() - 1;
23230 else
23231 Vec = (Vec - 1) / 2;
23232
23233 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23234 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23235 // generate:
23236 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23237 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23238 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23239 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23240 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23241 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23242 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23243
23244 // Make sure the initial size of the shuffle list is even.
23245 if (Shuffles.size() % 2)
23246 Shuffles.push_back(DAG.getUNDEF(VT));
23247
23248 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23249 if (CurSize % 2) {
23250 Shuffles[CurSize] = DAG.getUNDEF(VT);
23251 CurSize++;
23252 }
23253 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23254 int Left = 2 * In;
23255 int Right = 2 * In + 1;
23256 SmallVector<int, 8> Mask(NumElems, -1);
23257 SDValue L = Shuffles[Left];
23258 ArrayRef<int> LMask;
23259 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23260 L.use_empty() && L.getOperand(1).isUndef() &&
23261 L.getOperand(0).getValueType() == L.getValueType();
23262 if (IsLeftShuffle) {
23263 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23264 L = L.getOperand(0);
23265 }
23266 SDValue R = Shuffles[Right];
23267 ArrayRef<int> RMask;
23268 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23269 R.use_empty() && R.getOperand(1).isUndef() &&
23270 R.getOperand(0).getValueType() == R.getValueType();
23271 if (IsRightShuffle) {
23272 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23273 R = R.getOperand(0);
23274 }
23275 for (unsigned I = 0; I != NumElems; ++I) {
23276 if (VectorMask[I] == Left) {
23277 Mask[I] = I;
23278 if (IsLeftShuffle)
23279 Mask[I] = LMask[I];
23280 VectorMask[I] = In;
23281 } else if (VectorMask[I] == Right) {
23282 Mask[I] = I + NumElems;
23283 if (IsRightShuffle)
23284 Mask[I] = RMask[I] + NumElems;
23285 VectorMask[I] = In;
23286 }
23287 }
23288
23289 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23290 }
23291 }
23292 return Shuffles[0];
23293}
23294
23295// Try to turn a build vector of zero extends of extract vector elts into a
23296// a vector zero extend and possibly an extract subvector.
23297// TODO: Support sign extend?
23298// TODO: Allow undef elements?
23299SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23300 if (LegalOperations)
23301 return SDValue();
23302
23303 EVT VT = N->getValueType(0);
23304
23305 bool FoundZeroExtend = false;
23306 SDValue Op0 = N->getOperand(0);
23307 auto checkElem = [&](SDValue Op) -> int64_t {
23308 unsigned Opc = Op.getOpcode();
23309 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23310 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23311 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23312 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23313 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23314 return C->getZExtValue();
23315 return -1;
23316 };
23317
23318 // Make sure the first element matches
23319 // (zext (extract_vector_elt X, C))
23320 // Offset must be a constant multiple of the
23321 // known-minimum vector length of the result type.
23322 int64_t Offset = checkElem(Op0);
23323 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23324 return SDValue();
23325
23326 unsigned NumElems = N->getNumOperands();
23327 SDValue In = Op0.getOperand(0).getOperand(0);
23328 EVT InSVT = In.getValueType().getScalarType();
23329 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23330
23331 // Don't create an illegal input type after type legalization.
23332 if (LegalTypes && !TLI.isTypeLegal(InVT))
23333 return SDValue();
23334
23335 // Ensure all the elements come from the same vector and are adjacent.
23336 for (unsigned i = 1; i != NumElems; ++i) {
23337 if ((Offset + i) != checkElem(N->getOperand(i)))
23338 return SDValue();
23339 }
23340
23341 SDLoc DL(N);
23342 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23343 Op0.getOperand(0).getOperand(1));
23344 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23345 VT, In);
23346}
23347
23348// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23349// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23350// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23351// This patten can appear during legalization.
23352//
23353// NOTE: This can be generalized to allow more than a single
23354// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23355SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23356 // Don't run this after legalization. Targets may have other preferences.
23357 if (Level >= AfterLegalizeDAG)
23358 return SDValue();
23359
23360 // FIXME: support big-endian.
23361 if (DAG.getDataLayout().isBigEndian())
23362 return SDValue();
23363
23364 EVT VT = N->getValueType(0);
23365 EVT OpVT = N->getOperand(0).getValueType();
23366 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23367
23368 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23369
23370 if (!TLI.isTypeLegal(OpIntVT) ||
23371 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23372 return SDValue();
23373
23374 unsigned EltBitwidth = VT.getScalarSizeInBits();
23375 // NOTE: the actual width of operands may be wider than that!
23376
23377 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23378 // active bits they all have? We'll want to truncate them all to that width.
23379 unsigned ActiveBits = 0;
23380 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23381 for (auto I : enumerate(N->ops())) {
23382 SDValue Op = I.value();
23383 // FIXME: support UNDEF elements?
23384 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23385 unsigned OpActiveBits =
23386 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23387 if (OpActiveBits == 0) {
23388 KnownZeroOps.setBit(I.index());
23389 continue;
23390 }
23391 // Profitability check: don't allow non-zero constant operands.
23392 return SDValue();
23393 }
23394 // Profitability check: there must only be a single non-zero operand,
23395 // and it must be the first operand of the BUILD_VECTOR.
23396 if (I.index() != 0)
23397 return SDValue();
23398 // The operand must be a zero-extension itself.
23399 // FIXME: this could be generalized to known leading zeros check.
23400 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23401 return SDValue();
23402 unsigned CurrActiveBits =
23403 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23404 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23405 ActiveBits = CurrActiveBits;
23406 // We want to at least halve the element size.
23407 if (2 * ActiveBits > EltBitwidth)
23408 return SDValue();
23409 }
23410
23411 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23412 if (ActiveBits == 0)
23413 return SDValue();
23414
23415 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23416 // into how many chunks can we split our element width?
23417 EVT NewScalarIntVT, NewIntVT;
23418 std::optional<unsigned> Factor;
23419 // We can split the element into at least two chunks, but not into more
23420 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23421 // for which the element width is a multiple of it,
23422 // and the resulting types/operations on that chunk width are legal.
23423 assert(2 * ActiveBits <= EltBitwidth &&
23424 "We know that half or less bits of the element are active.");
23425 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23426 if (EltBitwidth % Scale != 0)
23427 continue;
23428 unsigned ChunkBitwidth = EltBitwidth / Scale;
23429 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23430 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23431 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23432 Scale * N->getNumOperands());
23433 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23434 (LegalOperations &&
23435 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23437 continue;
23438 Factor = Scale;
23439 break;
23440 }
23441 if (!Factor)
23442 return SDValue();
23443
23444 SDLoc DL(N);
23445 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23446
23447 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23449 NewOps.reserve(NewIntVT.getVectorNumElements());
23450 for (auto I : enumerate(N->ops())) {
23451 SDValue Op = I.value();
23452 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23453 unsigned SrcOpIdx = I.index();
23454 if (KnownZeroOps[SrcOpIdx]) {
23455 NewOps.append(*Factor, ZeroOp);
23456 continue;
23457 }
23458 Op = DAG.getBitcast(OpIntVT, Op);
23459 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23460 NewOps.emplace_back(Op);
23461 NewOps.append(*Factor - 1, ZeroOp);
23462 }
23463 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23464 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23465 NewBV = DAG.getBitcast(VT, NewBV);
23466 return NewBV;
23467}
23468
23469SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23470 EVT VT = N->getValueType(0);
23471
23472 // A vector built entirely of undefs is undef.
23474 return DAG.getUNDEF(VT);
23475
23476 // If this is a splat of a bitcast from another vector, change to a
23477 // concat_vector.
23478 // For example:
23479 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23480 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23481 //
23482 // If X is a build_vector itself, the concat can become a larger build_vector.
23483 // TODO: Maybe this is useful for non-splat too?
23484 if (!LegalOperations) {
23485 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23486 // Only change build_vector to a concat_vector if the splat value type is
23487 // same as the vector element type.
23488 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23490 EVT SrcVT = Splat.getValueType();
23491 if (SrcVT.isVector()) {
23492 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23493 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23494 SrcVT.getVectorElementType(), NumElts);
23495 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23496 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23497 SDValue Concat =
23498 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23499 return DAG.getBitcast(VT, Concat);
23500 }
23501 }
23502 }
23503 }
23504
23505 // Check if we can express BUILD VECTOR via subvector extract.
23506 if (!LegalTypes && (N->getNumOperands() > 1)) {
23507 SDValue Op0 = N->getOperand(0);
23508 auto checkElem = [&](SDValue Op) -> uint64_t {
23509 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23510 (Op0.getOperand(0) == Op.getOperand(0)))
23511 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23512 return CNode->getZExtValue();
23513 return -1;
23514 };
23515
23516 int Offset = checkElem(Op0);
23517 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23518 if (Offset + i != checkElem(N->getOperand(i))) {
23519 Offset = -1;
23520 break;
23521 }
23522 }
23523
23524 if ((Offset == 0) &&
23525 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23526 return Op0.getOperand(0);
23527 if ((Offset != -1) &&
23528 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23529 0)) // IDX must be multiple of output size.
23530 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23531 Op0.getOperand(0), Op0.getOperand(1));
23532 }
23533
23534 if (SDValue V = convertBuildVecZextToZext(N))
23535 return V;
23536
23537 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23538 return V;
23539
23540 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23541 return V;
23542
23543 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23544 return V;
23545
23546 if (SDValue V = reduceBuildVecToShuffle(N))
23547 return V;
23548
23549 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23550 // Do this late as some of the above may replace the splat.
23552 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23553 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23554 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23555 }
23556
23557 return SDValue();
23558}
23559
23561 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23562 EVT OpVT = N->getOperand(0).getValueType();
23563
23564 // If the operands are legal vectors, leave them alone.
23565 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23566 return SDValue();
23567
23568 SDLoc DL(N);
23569 EVT VT = N->getValueType(0);
23571 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23572
23573 // Keep track of what we encounter.
23574 bool AnyInteger = false;
23575 bool AnyFP = false;
23576 for (const SDValue &Op : N->ops()) {
23577 if (ISD::BITCAST == Op.getOpcode() &&
23578 !Op.getOperand(0).getValueType().isVector())
23579 Ops.push_back(Op.getOperand(0));
23580 else if (ISD::UNDEF == Op.getOpcode())
23581 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23582 else
23583 return SDValue();
23584
23585 // Note whether we encounter an integer or floating point scalar.
23586 // If it's neither, bail out, it could be something weird like x86mmx.
23587 EVT LastOpVT = Ops.back().getValueType();
23588 if (LastOpVT.isFloatingPoint())
23589 AnyFP = true;
23590 else if (LastOpVT.isInteger())
23591 AnyInteger = true;
23592 else
23593 return SDValue();
23594 }
23595
23596 // If any of the operands is a floating point scalar bitcast to a vector,
23597 // use floating point types throughout, and bitcast everything.
23598 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23599 if (AnyFP) {
23601 if (AnyInteger) {
23602 for (SDValue &Op : Ops) {
23603 if (Op.getValueType() == SVT)
23604 continue;
23605 if (Op.isUndef())
23606 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23607 else
23608 Op = DAG.getBitcast(SVT, Op);
23609 }
23610 }
23611 }
23612
23613 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23614 VT.getSizeInBits() / SVT.getSizeInBits());
23615 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23616}
23617
23618// Attempt to merge nested concat_vectors/undefs.
23619// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23620// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23622 SelectionDAG &DAG) {
23623 EVT VT = N->getValueType(0);
23624
23625 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23626 EVT SubVT;
23627 SDValue FirstConcat;
23628 for (const SDValue &Op : N->ops()) {
23629 if (Op.isUndef())
23630 continue;
23631 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23632 return SDValue();
23633 if (!FirstConcat) {
23634 SubVT = Op.getOperand(0).getValueType();
23635 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23636 return SDValue();
23637 FirstConcat = Op;
23638 continue;
23639 }
23640 if (SubVT != Op.getOperand(0).getValueType())
23641 return SDValue();
23642 }
23643 assert(FirstConcat && "Concat of all-undefs found");
23644
23645 SmallVector<SDValue> ConcatOps;
23646 for (const SDValue &Op : N->ops()) {
23647 if (Op.isUndef()) {
23648 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23649 continue;
23650 }
23651 ConcatOps.append(Op->op_begin(), Op->op_end());
23652 }
23653 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23654}
23655
23656// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23657// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23658// most two distinct vectors the same size as the result, attempt to turn this
23659// into a legal shuffle.
23661 EVT VT = N->getValueType(0);
23662 EVT OpVT = N->getOperand(0).getValueType();
23663
23664 // We currently can't generate an appropriate shuffle for a scalable vector.
23665 if (VT.isScalableVector())
23666 return SDValue();
23667
23668 int NumElts = VT.getVectorNumElements();
23669 int NumOpElts = OpVT.getVectorNumElements();
23670
23671 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23673
23674 for (SDValue Op : N->ops()) {
23676
23677 // UNDEF nodes convert to UNDEF shuffle mask values.
23678 if (Op.isUndef()) {
23679 Mask.append((unsigned)NumOpElts, -1);
23680 continue;
23681 }
23682
23683 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23684 return SDValue();
23685
23686 // What vector are we extracting the subvector from and at what index?
23687 SDValue ExtVec = Op.getOperand(0);
23688 int ExtIdx = Op.getConstantOperandVal(1);
23689
23690 // We want the EVT of the original extraction to correctly scale the
23691 // extraction index.
23692 EVT ExtVT = ExtVec.getValueType();
23693 ExtVec = peekThroughBitcasts(ExtVec);
23694
23695 // UNDEF nodes convert to UNDEF shuffle mask values.
23696 if (ExtVec.isUndef()) {
23697 Mask.append((unsigned)NumOpElts, -1);
23698 continue;
23699 }
23700
23701 // Ensure that we are extracting a subvector from a vector the same
23702 // size as the result.
23703 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23704 return SDValue();
23705
23706 // Scale the subvector index to account for any bitcast.
23707 int NumExtElts = ExtVT.getVectorNumElements();
23708 if (0 == (NumExtElts % NumElts))
23709 ExtIdx /= (NumExtElts / NumElts);
23710 else if (0 == (NumElts % NumExtElts))
23711 ExtIdx *= (NumElts / NumExtElts);
23712 else
23713 return SDValue();
23714
23715 // At most we can reference 2 inputs in the final shuffle.
23716 if (SV0.isUndef() || SV0 == ExtVec) {
23717 SV0 = ExtVec;
23718 for (int i = 0; i != NumOpElts; ++i)
23719 Mask.push_back(i + ExtIdx);
23720 } else if (SV1.isUndef() || SV1 == ExtVec) {
23721 SV1 = ExtVec;
23722 for (int i = 0; i != NumOpElts; ++i)
23723 Mask.push_back(i + ExtIdx + NumElts);
23724 } else {
23725 return SDValue();
23726 }
23727 }
23728
23729 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23730 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23731 DAG.getBitcast(VT, SV1), Mask, DAG);
23732}
23733
23735 unsigned CastOpcode = N->getOperand(0).getOpcode();
23736 switch (CastOpcode) {
23737 case ISD::SINT_TO_FP:
23738 case ISD::UINT_TO_FP:
23739 case ISD::FP_TO_SINT:
23740 case ISD::FP_TO_UINT:
23741 // TODO: Allow more opcodes?
23742 // case ISD::BITCAST:
23743 // case ISD::TRUNCATE:
23744 // case ISD::ZERO_EXTEND:
23745 // case ISD::SIGN_EXTEND:
23746 // case ISD::FP_EXTEND:
23747 break;
23748 default:
23749 return SDValue();
23750 }
23751
23752 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23753 if (!SrcVT.isVector())
23754 return SDValue();
23755
23756 // All operands of the concat must be the same kind of cast from the same
23757 // source type.
23759 for (SDValue Op : N->ops()) {
23760 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23761 Op.getOperand(0).getValueType() != SrcVT)
23762 return SDValue();
23763 SrcOps.push_back(Op.getOperand(0));
23764 }
23765
23766 // The wider cast must be supported by the target. This is unusual because
23767 // the operation support type parameter depends on the opcode. In addition,
23768 // check the other type in the cast to make sure this is really legal.
23769 EVT VT = N->getValueType(0);
23770 EVT SrcEltVT = SrcVT.getVectorElementType();
23771 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23772 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23773 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23774 switch (CastOpcode) {
23775 case ISD::SINT_TO_FP:
23776 case ISD::UINT_TO_FP:
23777 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23778 !TLI.isTypeLegal(VT))
23779 return SDValue();
23780 break;
23781 case ISD::FP_TO_SINT:
23782 case ISD::FP_TO_UINT:
23783 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23784 !TLI.isTypeLegal(ConcatSrcVT))
23785 return SDValue();
23786 break;
23787 default:
23788 llvm_unreachable("Unexpected cast opcode");
23789 }
23790
23791 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23792 SDLoc DL(N);
23793 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23794 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23795}
23796
23797// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23798// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23799// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23801 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23802 bool LegalOperations) {
23803 EVT VT = N->getValueType(0);
23804 EVT OpVT = N->getOperand(0).getValueType();
23805 if (VT.isScalableVector())
23806 return SDValue();
23807
23808 // For now, only allow simple 2-operand concatenations.
23809 if (N->getNumOperands() != 2)
23810 return SDValue();
23811
23812 // Don't create illegal types/shuffles when not allowed to.
23813 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23814 (LegalOperations &&
23816 return SDValue();
23817
23818 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23819 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23820 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23821 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23822 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23823 ShuffleVectorSDNode *SVN = nullptr;
23824 for (SDValue Op : N->ops()) {
23825 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23826 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23827 all_of(N->ops(), [CurSVN](SDValue Op) {
23828 // FIXME: can we allow UNDEF operands?
23829 return !Op.isUndef() &&
23830 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23831 })) {
23832 SVN = CurSVN;
23833 break;
23834 }
23835 }
23836 if (!SVN)
23837 return SDValue();
23838
23839 // We are going to pad the shuffle operands, so any indice, that was picking
23840 // from the second operand, must be adjusted.
23841 SmallVector<int, 16> AdjustedMask;
23842 AdjustedMask.reserve(SVN->getMask().size());
23843 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23844 append_range(AdjustedMask, SVN->getMask());
23845
23846 // Identity masks for the operands of the (padded) shuffle.
23847 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23848 MutableArrayRef<int> FirstShufOpIdentityMask =
23849 MutableArrayRef<int>(IdentityMask)
23851 MutableArrayRef<int> SecondShufOpIdentityMask =
23853 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23854 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23856
23857 // New combined shuffle mask.
23859 Mask.reserve(VT.getVectorNumElements());
23860 for (SDValue Op : N->ops()) {
23861 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23862 if (Op.getNode() == SVN) {
23863 append_range(Mask, AdjustedMask);
23864 continue;
23865 }
23866 if (Op == SVN->getOperand(0)) {
23867 append_range(Mask, FirstShufOpIdentityMask);
23868 continue;
23869 }
23870 if (Op == SVN->getOperand(1)) {
23871 append_range(Mask, SecondShufOpIdentityMask);
23872 continue;
23873 }
23874 llvm_unreachable("Unexpected operand!");
23875 }
23876
23877 // Don't create illegal shuffle masks.
23878 if (!TLI.isShuffleMaskLegal(Mask, VT))
23879 return SDValue();
23880
23881 // Pad the shuffle operands with UNDEF.
23882 SDLoc dl(N);
23883 std::array<SDValue, 2> ShufOps;
23884 for (auto I : zip(SVN->ops(), ShufOps)) {
23885 SDValue ShufOp = std::get<0>(I);
23886 SDValue &NewShufOp = std::get<1>(I);
23887 if (ShufOp.isUndef())
23888 NewShufOp = DAG.getUNDEF(VT);
23889 else {
23890 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23891 DAG.getUNDEF(OpVT));
23892 ShufOpParts[0] = ShufOp;
23893 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23894 }
23895 }
23896 // Finally, create the new wide shuffle.
23897 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23898}
23899
23900SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23901 // If we only have one input vector, we don't need to do any concatenation.
23902 if (N->getNumOperands() == 1)
23903 return N->getOperand(0);
23904
23905 // Check if all of the operands are undefs.
23906 EVT VT = N->getValueType(0);
23908 return DAG.getUNDEF(VT);
23909
23910 // Optimize concat_vectors where all but the first of the vectors are undef.
23911 if (all_of(drop_begin(N->ops()),
23912 [](const SDValue &Op) { return Op.isUndef(); })) {
23913 SDValue In = N->getOperand(0);
23914 assert(In.getValueType().isVector() && "Must concat vectors");
23915
23916 // If the input is a concat_vectors, just make a larger concat by padding
23917 // with smaller undefs.
23918 //
23919 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23920 // here could cause an infinite loop. That legalizing happens when LegalDAG
23921 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23922 // scalable.
23923 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23924 !(LegalDAG && In.getValueType().isScalableVector())) {
23925 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23926 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23927 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23928 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23929 }
23930
23932
23933 // concat_vectors(scalar_to_vector(scalar), undef) ->
23934 // scalar_to_vector(scalar)
23935 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23936 Scalar.hasOneUse()) {
23937 EVT SVT = Scalar.getValueType().getVectorElementType();
23938 if (SVT == Scalar.getOperand(0).getValueType())
23939 Scalar = Scalar.getOperand(0);
23940 }
23941
23942 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23943 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
23944 // If the bitcast type isn't legal, it might be a trunc of a legal type;
23945 // look through the trunc so we can still do the transform:
23946 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23947 if (Scalar->getOpcode() == ISD::TRUNCATE &&
23948 !TLI.isTypeLegal(Scalar.getValueType()) &&
23949 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23950 Scalar = Scalar->getOperand(0);
23951
23952 EVT SclTy = Scalar.getValueType();
23953
23954 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23955 return SDValue();
23956
23957 // Bail out if the vector size is not a multiple of the scalar size.
23958 if (VT.getSizeInBits() % SclTy.getSizeInBits())
23959 return SDValue();
23960
23961 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23962 if (VNTNumElms < 2)
23963 return SDValue();
23964
23965 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23966 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23967 return SDValue();
23968
23969 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23970 return DAG.getBitcast(VT, Res);
23971 }
23972 }
23973
23974 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23975 // We have already tested above for an UNDEF only concatenation.
23976 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23977 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23978 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23979 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23980 };
23981 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23983 EVT SVT = VT.getScalarType();
23984
23985 EVT MinVT = SVT;
23986 if (!SVT.isFloatingPoint()) {
23987 // If BUILD_VECTOR are from built from integer, they may have different
23988 // operand types. Get the smallest type and truncate all operands to it.
23989 bool FoundMinVT = false;
23990 for (const SDValue &Op : N->ops())
23991 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23992 EVT OpSVT = Op.getOperand(0).getValueType();
23993 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
23994 FoundMinVT = true;
23995 }
23996 assert(FoundMinVT && "Concat vector type mismatch");
23997 }
23998
23999 for (const SDValue &Op : N->ops()) {
24000 EVT OpVT = Op.getValueType();
24001 unsigned NumElts = OpVT.getVectorNumElements();
24002
24003 if (ISD::UNDEF == Op.getOpcode())
24004 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24005
24006 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24007 if (SVT.isFloatingPoint()) {
24008 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24009 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24010 } else {
24011 for (unsigned i = 0; i != NumElts; ++i)
24012 Opnds.push_back(
24013 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24014 }
24015 }
24016 }
24017
24018 assert(VT.getVectorNumElements() == Opnds.size() &&
24019 "Concat vector type mismatch");
24020 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24021 }
24022
24023 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24024 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24026 return V;
24027
24028 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24029 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24031 return V;
24032
24033 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24035 return V;
24036 }
24037
24038 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24039 return V;
24040
24042 N, DAG, TLI, LegalTypes, LegalOperations))
24043 return V;
24044
24045 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24046 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24047 // operands and look for a CONCAT operations that place the incoming vectors
24048 // at the exact same location.
24049 //
24050 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24051 SDValue SingleSource = SDValue();
24052 unsigned PartNumElem =
24053 N->getOperand(0).getValueType().getVectorMinNumElements();
24054
24055 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24056 SDValue Op = N->getOperand(i);
24057
24058 if (Op.isUndef())
24059 continue;
24060
24061 // Check if this is the identity extract:
24062 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24063 return SDValue();
24064
24065 // Find the single incoming vector for the extract_subvector.
24066 if (SingleSource.getNode()) {
24067 if (Op.getOperand(0) != SingleSource)
24068 return SDValue();
24069 } else {
24070 SingleSource = Op.getOperand(0);
24071
24072 // Check the source type is the same as the type of the result.
24073 // If not, this concat may extend the vector, so we can not
24074 // optimize it away.
24075 if (SingleSource.getValueType() != N->getValueType(0))
24076 return SDValue();
24077 }
24078
24079 // Check that we are reading from the identity index.
24080 unsigned IdentityIndex = i * PartNumElem;
24081 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24082 return SDValue();
24083 }
24084
24085 if (SingleSource.getNode())
24086 return SingleSource;
24087
24088 return SDValue();
24089}
24090
24091// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24092// if the subvector can be sourced for free.
24094 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24095 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24096 return V.getOperand(1);
24097 }
24098 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24099 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24100 V.getOperand(0).getValueType() == SubVT &&
24101 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24102 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24103 return V.getOperand(SubIdx);
24104 }
24105 return SDValue();
24106}
24107
24109 SelectionDAG &DAG,
24110 bool LegalOperations) {
24111 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24112 SDValue BinOp = Extract->getOperand(0);
24113 unsigned BinOpcode = BinOp.getOpcode();
24114 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24115 return SDValue();
24116
24117 EVT VecVT = BinOp.getValueType();
24118 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24119 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24120 return SDValue();
24121
24122 SDValue Index = Extract->getOperand(1);
24123 EVT SubVT = Extract->getValueType(0);
24124 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24125 return SDValue();
24126
24127 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24128 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24129
24130 // TODO: We could handle the case where only 1 operand is being inserted by
24131 // creating an extract of the other operand, but that requires checking
24132 // number of uses and/or costs.
24133 if (!Sub0 || !Sub1)
24134 return SDValue();
24135
24136 // We are inserting both operands of the wide binop only to extract back
24137 // to the narrow vector size. Eliminate all of the insert/extract:
24138 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24139 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24140 BinOp->getFlags());
24141}
24142
24143/// If we are extracting a subvector produced by a wide binary operator try
24144/// to use a narrow binary operator and/or avoid concatenation and extraction.
24146 bool LegalOperations) {
24147 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24148 // some of these bailouts with other transforms.
24149
24150 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24151 return V;
24152
24153 // The extract index must be a constant, so we can map it to a concat operand.
24154 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24155 if (!ExtractIndexC)
24156 return SDValue();
24157
24158 // We are looking for an optionally bitcasted wide vector binary operator
24159 // feeding an extract subvector.
24160 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24161 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24162 unsigned BOpcode = BinOp.getOpcode();
24163 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24164 return SDValue();
24165
24166 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24167 // reduced to the unary fneg when it is visited, and we probably want to deal
24168 // with fneg in a target-specific way.
24169 if (BOpcode == ISD::FSUB) {
24170 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24171 if (C && C->getValueAPF().isNegZero())
24172 return SDValue();
24173 }
24174
24175 // The binop must be a vector type, so we can extract some fraction of it.
24176 EVT WideBVT = BinOp.getValueType();
24177 // The optimisations below currently assume we are dealing with fixed length
24178 // vectors. It is possible to add support for scalable vectors, but at the
24179 // moment we've done no analysis to prove whether they are profitable or not.
24180 if (!WideBVT.isFixedLengthVector())
24181 return SDValue();
24182
24183 EVT VT = Extract->getValueType(0);
24184 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24185 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24186 "Extract index is not a multiple of the vector length.");
24187
24188 // Bail out if this is not a proper multiple width extraction.
24189 unsigned WideWidth = WideBVT.getSizeInBits();
24190 unsigned NarrowWidth = VT.getSizeInBits();
24191 if (WideWidth % NarrowWidth != 0)
24192 return SDValue();
24193
24194 // Bail out if we are extracting a fraction of a single operation. This can
24195 // occur because we potentially looked through a bitcast of the binop.
24196 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24197 unsigned WideNumElts = WideBVT.getVectorNumElements();
24198 if (WideNumElts % NarrowingRatio != 0)
24199 return SDValue();
24200
24201 // Bail out if the target does not support a narrower version of the binop.
24202 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24203 WideNumElts / NarrowingRatio);
24204 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24205 LegalOperations))
24206 return SDValue();
24207
24208 // If extraction is cheap, we don't need to look at the binop operands
24209 // for concat ops. The narrow binop alone makes this transform profitable.
24210 // We can't just reuse the original extract index operand because we may have
24211 // bitcasted.
24212 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24213 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24214 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24215 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24216 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24217 SDLoc DL(Extract);
24218 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24219 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24220 BinOp.getOperand(0), NewExtIndex);
24221 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24222 BinOp.getOperand(1), NewExtIndex);
24223 SDValue NarrowBinOp =
24224 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24225 return DAG.getBitcast(VT, NarrowBinOp);
24226 }
24227
24228 // Only handle the case where we are doubling and then halving. A larger ratio
24229 // may require more than two narrow binops to replace the wide binop.
24230 if (NarrowingRatio != 2)
24231 return SDValue();
24232
24233 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24234 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24235 // flavors, but no other 256-bit integer support. This could be extended to
24236 // handle any binop, but that may require fixing/adding other folds to avoid
24237 // codegen regressions.
24238 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24239 return SDValue();
24240
24241 // We need at least one concatenation operation of a binop operand to make
24242 // this transform worthwhile. The concat must double the input vector sizes.
24243 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24244 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24245 return V.getOperand(ConcatOpNum);
24246 return SDValue();
24247 };
24248 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24249 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24250
24251 if (SubVecL || SubVecR) {
24252 // If a binop operand was not the result of a concat, we must extract a
24253 // half-sized operand for our new narrow binop:
24254 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24255 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24256 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24257 SDLoc DL(Extract);
24258 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24259 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24260 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24261 BinOp.getOperand(0), IndexC);
24262
24263 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24264 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24265 BinOp.getOperand(1), IndexC);
24266
24267 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24268 return DAG.getBitcast(VT, NarrowBinOp);
24269 }
24270
24271 return SDValue();
24272}
24273
24274/// If we are extracting a subvector from a wide vector load, convert to a
24275/// narrow load to eliminate the extraction:
24276/// (extract_subvector (load wide vector)) --> (load narrow vector)
24278 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24279 if (DAG.getDataLayout().isBigEndian())
24280 return SDValue();
24281
24282 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24283 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24284 return SDValue();
24285
24286 // Allow targets to opt-out.
24287 EVT VT = Extract->getValueType(0);
24288
24289 // We can only create byte sized loads.
24290 if (!VT.isByteSized())
24291 return SDValue();
24292
24293 unsigned Index = Extract->getConstantOperandVal(1);
24294 unsigned NumElts = VT.getVectorMinNumElements();
24295 // A fixed length vector being extracted from a scalable vector
24296 // may not be any *smaller* than the scalable one.
24297 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24298 return SDValue();
24299
24300 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24301 // multiple of the minimum number of elements in the result type.
24302 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24303 "multiple of the result's element count");
24304
24305 // It's fine to use TypeSize here as we know the offset will not be negative.
24306 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24307
24308 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24309 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24310 return SDValue();
24311
24312 // The narrow load will be offset from the base address of the old load if
24313 // we are extracting from something besides index 0 (little-endian).
24314 SDLoc DL(Extract);
24315
24316 // TODO: Use "BaseIndexOffset" to make this more effective.
24317 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24318
24321 MachineMemOperand *MMO;
24322 if (Offset.isScalable()) {
24323 MachinePointerInfo MPI =
24325 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24326 } else
24327 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24328 StoreSize);
24329
24330 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24331 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24332 return NewLd;
24333}
24334
24335/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24336/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24337/// EXTRACT_SUBVECTOR(Op?, ?),
24338/// Mask'))
24339/// iff it is legal and profitable to do so. Notably, the trimmed mask
24340/// (containing only the elements that are extracted)
24341/// must reference at most two subvectors.
24343 SelectionDAG &DAG,
24344 const TargetLowering &TLI,
24345 bool LegalOperations) {
24346 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24347 "Must only be called on EXTRACT_SUBVECTOR's");
24348
24349 SDValue N0 = N->getOperand(0);
24350
24351 // Only deal with non-scalable vectors.
24352 EVT NarrowVT = N->getValueType(0);
24353 EVT WideVT = N0.getValueType();
24354 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24355 return SDValue();
24356
24357 // The operand must be a shufflevector.
24358 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24359 if (!WideShuffleVector)
24360 return SDValue();
24361
24362 // The old shuffleneeds to go away.
24363 if (!WideShuffleVector->hasOneUse())
24364 return SDValue();
24365
24366 // And the narrow shufflevector that we'll form must be legal.
24367 if (LegalOperations &&
24369 return SDValue();
24370
24371 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24372 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24373 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24374 "Extract index is not a multiple of the output vector length.");
24375
24376 int WideNumElts = WideVT.getVectorNumElements();
24377
24378 SmallVector<int, 16> NewMask;
24379 NewMask.reserve(NumEltsExtracted);
24380 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24381 DemandedSubvectors;
24382
24383 // Try to decode the wide mask into narrow mask from at most two subvectors.
24384 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24385 NumEltsExtracted)) {
24386 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24387 "Out-of-bounds shuffle mask?");
24388
24389 if (M < 0) {
24390 // Does not depend on operands, does not require adjustment.
24391 NewMask.emplace_back(M);
24392 continue;
24393 }
24394
24395 // From which operand of the shuffle does this shuffle mask element pick?
24396 int WideShufOpIdx = M / WideNumElts;
24397 // Which element of that operand is picked?
24398 int OpEltIdx = M % WideNumElts;
24399
24400 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24401 "Shuffle mask vector decomposition failure.");
24402
24403 // And which NumEltsExtracted-sized subvector of that operand is that?
24404 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24405 // And which element within that subvector of that operand is that?
24406 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24407
24408 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24409 "Shuffle mask subvector decomposition failure.");
24410
24411 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24412 WideShufOpIdx * WideNumElts) == M &&
24413 "Shuffle mask full decomposition failure.");
24414
24415 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24416
24417 if (Op.isUndef()) {
24418 // Picking from an undef operand. Let's adjust mask instead.
24419 NewMask.emplace_back(-1);
24420 continue;
24421 }
24422
24423 const std::pair<SDValue, int> DemandedSubvector =
24424 std::make_pair(Op, OpSubvecIdx);
24425
24426 if (DemandedSubvectors.insert(DemandedSubvector)) {
24427 if (DemandedSubvectors.size() > 2)
24428 return SDValue(); // We can't handle more than two subvectors.
24429 // How many elements into the WideVT does this subvector start?
24430 int Index = NumEltsExtracted * OpSubvecIdx;
24431 // Bail out if the extraction isn't going to be cheap.
24432 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24433 return SDValue();
24434 }
24435
24436 // Ok, but from which operand of the new shuffle will this element pick?
24437 int NewOpIdx =
24438 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24439 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24440
24441 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24442 NewMask.emplace_back(AdjM);
24443 }
24444 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24445 assert(DemandedSubvectors.size() <= 2 &&
24446 "Should have ended up demanding at most two subvectors.");
24447
24448 // Did we discover that the shuffle does not actually depend on operands?
24449 if (DemandedSubvectors.empty())
24450 return DAG.getUNDEF(NarrowVT);
24451
24452 // Profitability check: only deal with extractions from the first subvector
24453 // unless the mask becomes an identity mask.
24454 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24455 any_of(NewMask, [](int M) { return M < 0; }))
24456 for (auto &DemandedSubvector : DemandedSubvectors)
24457 if (DemandedSubvector.second != 0)
24458 return SDValue();
24459
24460 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24461 // operand[s]/index[es], so there is no point in checking for it's legality.
24462
24463 // Do not turn a legal shuffle into an illegal one.
24464 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24465 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24466 return SDValue();
24467
24468 SDLoc DL(N);
24469
24471 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24472 &DemandedSubvector : DemandedSubvectors) {
24473 // How many elements into the WideVT does this subvector start?
24474 int Index = NumEltsExtracted * DemandedSubvector.second;
24475 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24476 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24477 DemandedSubvector.first, IndexC));
24478 }
24479 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24480 "Should end up with either one or two ops");
24481
24482 // If we ended up with only one operand, pad with an undef.
24483 if (NewOps.size() == 1)
24484 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24485
24486 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24487}
24488
24489SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24490 EVT NVT = N->getValueType(0);
24491 SDValue V = N->getOperand(0);
24492 uint64_t ExtIdx = N->getConstantOperandVal(1);
24493 SDLoc DL(N);
24494
24495 // Extract from UNDEF is UNDEF.
24496 if (V.isUndef())
24497 return DAG.getUNDEF(NVT);
24498
24500 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24501 return NarrowLoad;
24502
24503 // Combine an extract of an extract into a single extract_subvector.
24504 // ext (ext X, C), 0 --> ext X, C
24505 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24506 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24507 V.getConstantOperandVal(1)) &&
24509 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24510 V.getOperand(1));
24511 }
24512 }
24513
24514 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24515 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24516 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24517 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24518 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24519
24520 // extract_subvector(insert_subvector(x,y,c1),c2)
24521 // --> extract_subvector(y,c2-c1)
24522 // iff we're just extracting from the inserted subvector.
24523 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24524 SDValue InsSub = V.getOperand(1);
24525 EVT InsSubVT = InsSub.getValueType();
24526 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24527 unsigned InsIdx = V.getConstantOperandVal(2);
24528 unsigned NumSubElts = NVT.getVectorMinNumElements();
24529 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24530 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24531 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24532 V.getValueType().isFixedLengthVector())
24533 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24534 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24535 }
24536
24537 // Try to move vector bitcast after extract_subv by scaling extraction index:
24538 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24539 if (V.getOpcode() == ISD::BITCAST &&
24540 V.getOperand(0).getValueType().isVector() &&
24541 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24542 SDValue SrcOp = V.getOperand(0);
24543 EVT SrcVT = SrcOp.getValueType();
24544 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24545 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24546 if ((SrcNumElts % DestNumElts) == 0) {
24547 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24548 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24549 EVT NewExtVT =
24550 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24552 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24553 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24554 V.getOperand(0), NewIndex);
24555 return DAG.getBitcast(NVT, NewExtract);
24556 }
24557 }
24558 if ((DestNumElts % SrcNumElts) == 0) {
24559 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24560 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24561 ElementCount NewExtEC =
24562 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24563 EVT ScalarVT = SrcVT.getScalarType();
24564 if ((ExtIdx % DestSrcRatio) == 0) {
24565 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24566 EVT NewExtVT =
24567 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24569 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24570 SDValue NewExtract =
24571 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24572 V.getOperand(0), NewIndex);
24573 return DAG.getBitcast(NVT, NewExtract);
24574 }
24575 if (NewExtEC.isScalar() &&
24577 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24578 SDValue NewExtract =
24579 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24580 V.getOperand(0), NewIndex);
24581 return DAG.getBitcast(NVT, NewExtract);
24582 }
24583 }
24584 }
24585 }
24586 }
24587
24588 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24589 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24590 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24591 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24592 "Concat and extract subvector do not change element type");
24593 assert((ExtIdx % ExtNumElts) == 0 &&
24594 "Extract index is not a multiple of the input vector length.");
24595
24596 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24597 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24598
24599 // If the concatenated source types match this extract, it's a direct
24600 // simplification:
24601 // extract_subvec (concat V1, V2, ...), i --> Vi
24602 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24603 return V.getOperand(ConcatOpIdx);
24604
24605 // If the concatenated source vectors are a multiple length of this extract,
24606 // then extract a fraction of one of those source vectors directly from a
24607 // concat operand. Example:
24608 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24609 // v2i8 extract_subvec v8i8 Y, 6
24610 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24611 ConcatSrcNumElts % ExtNumElts == 0) {
24612 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24613 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24614 "Trying to extract from >1 concat operand?");
24615 assert(NewExtIdx % ExtNumElts == 0 &&
24616 "Extract index is not a multiple of the input vector length.");
24617 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24618 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24619 V.getOperand(ConcatOpIdx), NewIndexC);
24620 }
24621 }
24622
24623 if (SDValue V =
24624 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24625 return V;
24626
24628
24629 // If the input is a build vector. Try to make a smaller build vector.
24630 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24631 EVT InVT = V.getValueType();
24632 unsigned ExtractSize = NVT.getSizeInBits();
24633 unsigned EltSize = InVT.getScalarSizeInBits();
24634 // Only do this if we won't split any elements.
24635 if (ExtractSize % EltSize == 0) {
24636 unsigned NumElems = ExtractSize / EltSize;
24637 EVT EltVT = InVT.getVectorElementType();
24638 EVT ExtractVT =
24639 NumElems == 1 ? EltVT
24640 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24641 if ((Level < AfterLegalizeDAG ||
24642 (NumElems == 1 ||
24643 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24644 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24645 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24646
24647 if (NumElems == 1) {
24648 SDValue Src = V->getOperand(IdxVal);
24649 if (EltVT != Src.getValueType())
24650 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24651 return DAG.getBitcast(NVT, Src);
24652 }
24653
24654 // Extract the pieces from the original build_vector.
24655 SDValue BuildVec =
24656 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24657 return DAG.getBitcast(NVT, BuildVec);
24658 }
24659 }
24660 }
24661
24662 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24663 // Handle only simple case where vector being inserted and vector
24664 // being extracted are of same size.
24665 EVT SmallVT = V.getOperand(1).getValueType();
24666 if (!NVT.bitsEq(SmallVT))
24667 return SDValue();
24668
24669 // Combine:
24670 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24671 // Into:
24672 // indices are equal or bit offsets are equal => V1
24673 // otherwise => (extract_subvec V1, ExtIdx)
24674 uint64_t InsIdx = V.getConstantOperandVal(2);
24675 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24676 ExtIdx * NVT.getScalarSizeInBits()) {
24677 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24678 return SDValue();
24679
24680 return DAG.getBitcast(NVT, V.getOperand(1));
24681 }
24682 return DAG.getNode(
24684 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24685 N->getOperand(1));
24686 }
24687
24688 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24689 return NarrowBOp;
24690
24692 return SDValue(N, 0);
24693
24694 return SDValue();
24695}
24696
24697/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24698/// followed by concatenation. Narrow vector ops may have better performance
24699/// than wide ops, and this can unlock further narrowing of other vector ops.
24700/// Targets can invert this transform later if it is not profitable.
24702 SelectionDAG &DAG) {
24703 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24704 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24705 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24706 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24707 return SDValue();
24708
24709 // Split the wide shuffle mask into halves. Any mask element that is accessing
24710 // operand 1 is offset down to account for narrowing of the vectors.
24711 ArrayRef<int> Mask = Shuf->getMask();
24712 EVT VT = Shuf->getValueType(0);
24713 unsigned NumElts = VT.getVectorNumElements();
24714 unsigned HalfNumElts = NumElts / 2;
24715 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24716 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24717 for (unsigned i = 0; i != NumElts; ++i) {
24718 if (Mask[i] == -1)
24719 continue;
24720 // If we reference the upper (undef) subvector then the element is undef.
24721 if ((Mask[i] % NumElts) >= HalfNumElts)
24722 continue;
24723 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24724 if (i < HalfNumElts)
24725 Mask0[i] = M;
24726 else
24727 Mask1[i - HalfNumElts] = M;
24728 }
24729
24730 // Ask the target if this is a valid transform.
24731 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24732 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24733 HalfNumElts);
24734 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24735 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24736 return SDValue();
24737
24738 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24739 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24740 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24741 SDLoc DL(Shuf);
24742 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24743 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24744 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24745}
24746
24747// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24748// or turn a shuffle of a single concat into simpler shuffle then concat.
24750 EVT VT = N->getValueType(0);
24751 unsigned NumElts = VT.getVectorNumElements();
24752
24753 SDValue N0 = N->getOperand(0);
24754 SDValue N1 = N->getOperand(1);
24755 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24756 ArrayRef<int> Mask = SVN->getMask();
24757
24759 EVT ConcatVT = N0.getOperand(0).getValueType();
24760 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24761 unsigned NumConcats = NumElts / NumElemsPerConcat;
24762
24763 auto IsUndefMaskElt = [](int i) { return i == -1; };
24764
24765 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24766 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24767 // half vector elements.
24768 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24769 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24770 IsUndefMaskElt)) {
24771 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24772 N0.getOperand(1),
24773 Mask.slice(0, NumElemsPerConcat));
24774 N1 = DAG.getUNDEF(ConcatVT);
24775 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24776 }
24777
24778 // Look at every vector that's inserted. We're looking for exact
24779 // subvector-sized copies from a concatenated vector
24780 for (unsigned I = 0; I != NumConcats; ++I) {
24781 unsigned Begin = I * NumElemsPerConcat;
24782 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24783
24784 // Make sure we're dealing with a copy.
24785 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24786 Ops.push_back(DAG.getUNDEF(ConcatVT));
24787 continue;
24788 }
24789
24790 int OpIdx = -1;
24791 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24792 if (IsUndefMaskElt(SubMask[i]))
24793 continue;
24794 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24795 return SDValue();
24796 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24797 if (0 <= OpIdx && EltOpIdx != OpIdx)
24798 return SDValue();
24799 OpIdx = EltOpIdx;
24800 }
24801 assert(0 <= OpIdx && "Unknown concat_vectors op");
24802
24803 if (OpIdx < (int)N0.getNumOperands())
24804 Ops.push_back(N0.getOperand(OpIdx));
24805 else
24806 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24807 }
24808
24809 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24810}
24811
24812// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24813// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24814//
24815// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24816// a simplification in some sense, but it isn't appropriate in general: some
24817// BUILD_VECTORs are substantially cheaper than others. The general case
24818// of a BUILD_VECTOR requires inserting each element individually (or
24819// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24820// all constants is a single constant pool load. A BUILD_VECTOR where each
24821// element is identical is a splat. A BUILD_VECTOR where most of the operands
24822// are undef lowers to a small number of element insertions.
24823//
24824// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24825// We don't fold shuffles where one side is a non-zero constant, and we don't
24826// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24827// non-constant operands. This seems to work out reasonably well in practice.
24829 SelectionDAG &DAG,
24830 const TargetLowering &TLI) {
24831 EVT VT = SVN->getValueType(0);
24832 unsigned NumElts = VT.getVectorNumElements();
24833 SDValue N0 = SVN->getOperand(0);
24834 SDValue N1 = SVN->getOperand(1);
24835
24836 if (!N0->hasOneUse())
24837 return SDValue();
24838
24839 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24840 // discussed above.
24841 if (!N1.isUndef()) {
24842 if (!N1->hasOneUse())
24843 return SDValue();
24844
24845 bool N0AnyConst = isAnyConstantBuildVector(N0);
24846 bool N1AnyConst = isAnyConstantBuildVector(N1);
24847 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24848 return SDValue();
24849 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24850 return SDValue();
24851 }
24852
24853 // If both inputs are splats of the same value then we can safely merge this
24854 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24855 bool IsSplat = false;
24856 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24857 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24858 if (BV0 && BV1)
24859 if (SDValue Splat0 = BV0->getSplatValue())
24860 IsSplat = (Splat0 == BV1->getSplatValue());
24861
24863 SmallSet<SDValue, 16> DuplicateOps;
24864 for (int M : SVN->getMask()) {
24865 SDValue Op = DAG.getUNDEF(VT.getScalarType());
24866 if (M >= 0) {
24867 int Idx = M < (int)NumElts ? M : M - NumElts;
24868 SDValue &S = (M < (int)NumElts ? N0 : N1);
24869 if (S.getOpcode() == ISD::BUILD_VECTOR) {
24870 Op = S.getOperand(Idx);
24871 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24872 SDValue Op0 = S.getOperand(0);
24873 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24874 } else {
24875 // Operand can't be combined - bail out.
24876 return SDValue();
24877 }
24878 }
24879
24880 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24881 // generating a splat; semantically, this is fine, but it's likely to
24882 // generate low-quality code if the target can't reconstruct an appropriate
24883 // shuffle.
24884 if (!Op.isUndef() && !isIntOrFPConstant(Op))
24885 if (!IsSplat && !DuplicateOps.insert(Op).second)
24886 return SDValue();
24887
24888 Ops.push_back(Op);
24889 }
24890
24891 // BUILD_VECTOR requires all inputs to be of the same type, find the
24892 // maximum type and extend them all.
24893 EVT SVT = VT.getScalarType();
24894 if (SVT.isInteger())
24895 for (SDValue &Op : Ops)
24896 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24897 if (SVT != VT.getScalarType())
24898 for (SDValue &Op : Ops)
24899 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24900 : (TLI.isZExtFree(Op.getValueType(), SVT)
24901 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24902 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24903 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24904}
24905
24906// Match shuffles that can be converted to *_vector_extend_in_reg.
24907// This is often generated during legalization.
24908// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24909// and returns the EVT to which the extension should be performed.
24910// NOTE: this assumes that the src is the first operand of the shuffle.
24912 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24913 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24914 bool LegalOperations) {
24915 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24916
24917 // TODO Add support for big-endian when we have a test case.
24918 if (!VT.isInteger() || IsBigEndian)
24919 return std::nullopt;
24920
24921 unsigned NumElts = VT.getVectorNumElements();
24922 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24923
24924 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24925 // power-of-2 extensions as they are the most likely.
24926 // FIXME: should try Scale == NumElts case too,
24927 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24928 // The vector width must be a multiple of Scale.
24929 if (NumElts % Scale != 0)
24930 continue;
24931
24932 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24933 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24934
24935 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24936 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24937 continue;
24938
24939 if (Match(Scale))
24940 return OutVT;
24941 }
24942
24943 return std::nullopt;
24944}
24945
24946// Match shuffles that can be converted to any_vector_extend_in_reg.
24947// This is often generated during legalization.
24948// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24950 SelectionDAG &DAG,
24951 const TargetLowering &TLI,
24952 bool LegalOperations) {
24953 EVT VT = SVN->getValueType(0);
24954 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24955
24956 // TODO Add support for big-endian when we have a test case.
24957 if (!VT.isInteger() || IsBigEndian)
24958 return SDValue();
24959
24960 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24961 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24962 Mask = SVN->getMask()](unsigned Scale) {
24963 for (unsigned i = 0; i != NumElts; ++i) {
24964 if (Mask[i] < 0)
24965 continue;
24966 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24967 continue;
24968 return false;
24969 }
24970 return true;
24971 };
24972
24973 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24974 SDValue N0 = SVN->getOperand(0);
24975 // Never create an illegal type. Only create unsupported operations if we
24976 // are pre-legalization.
24977 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24978 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24979 if (!OutVT)
24980 return SDValue();
24981 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24982}
24983
24984// Match shuffles that can be converted to zero_extend_vector_inreg.
24985// This is often generated during legalization.
24986// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
24988 SelectionDAG &DAG,
24989 const TargetLowering &TLI,
24990 bool LegalOperations) {
24991 bool LegalTypes = true;
24992 EVT VT = SVN->getValueType(0);
24993 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
24994 unsigned NumElts = VT.getVectorNumElements();
24995 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24996
24997 // TODO: add support for big-endian when we have a test case.
24998 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24999 if (!VT.isInteger() || IsBigEndian)
25000 return SDValue();
25001
25002 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25003 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25004 for (int &Indice : Mask) {
25005 if (Indice < 0)
25006 continue;
25007 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25008 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25009 Fn(Indice, OpIdx, OpEltIdx);
25010 }
25011 };
25012
25013 // Which elements of which operand does this shuffle demand?
25014 std::array<APInt, 2> OpsDemandedElts;
25015 for (APInt &OpDemandedElts : OpsDemandedElts)
25016 OpDemandedElts = APInt::getZero(NumElts);
25017 ForEachDecomposedIndice(
25018 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25019 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25020 });
25021
25022 // Element-wise(!), which of these demanded elements are know to be zero?
25023 std::array<APInt, 2> OpsKnownZeroElts;
25024 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25025 std::get<2>(I) =
25026 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25027
25028 // Manifest zeroable element knowledge in the shuffle mask.
25029 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25030 // this is a local invention, but it won't leak into DAG.
25031 // FIXME: should we not manifest them, but just check when matching?
25032 bool HadZeroableElts = false;
25033 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25034 int &Indice, int OpIdx, int OpEltIdx) {
25035 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25036 Indice = -2; // Zeroable element.
25037 HadZeroableElts = true;
25038 }
25039 });
25040
25041 // Don't proceed unless we've refined at least one zeroable mask indice.
25042 // If we didn't, then we are still trying to match the same shuffle mask
25043 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25044 // and evidently failed. Proceeding will lead to endless combine loops.
25045 if (!HadZeroableElts)
25046 return SDValue();
25047
25048 // The shuffle may be more fine-grained than we want. Widen elements first.
25049 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25050 SmallVector<int, 16> ScaledMask;
25051 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25052 assert(Mask.size() >= ScaledMask.size() &&
25053 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25054 int Prescale = Mask.size() / ScaledMask.size();
25055
25056 NumElts = ScaledMask.size();
25057 EltSizeInBits *= Prescale;
25058
25059 EVT PrescaledVT = EVT::getVectorVT(
25060 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25061 NumElts);
25062
25063 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25064 return SDValue();
25065
25066 // For example,
25067 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25068 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25069 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25070 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25071 "Unexpected mask scaling factor.");
25072 ArrayRef<int> Mask = ScaledMask;
25073 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25074 SrcElt != NumSrcElts; ++SrcElt) {
25075 // Analyze the shuffle mask in Scale-sized chunks.
25076 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25077 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25078 Mask = Mask.drop_front(MaskChunk.size());
25079 // The first indice in this chunk must be SrcElt, but not zero!
25080 // FIXME: undef should be fine, but that results in more-defined result.
25081 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25082 return false;
25083 // The rest of the indices in this chunk must be zeros.
25084 // FIXME: undef should be fine, but that results in more-defined result.
25085 if (!all_of(MaskChunk.drop_front(1),
25086 [](int Indice) { return Indice == -2; }))
25087 return false;
25088 }
25089 assert(Mask.empty() && "Did not process the whole mask?");
25090 return true;
25091 };
25092
25093 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25094 for (bool Commuted : {false, true}) {
25095 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25096 if (Commuted)
25098 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25099 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25100 LegalOperations);
25101 if (OutVT)
25102 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25103 DAG.getBitcast(PrescaledVT, Op)));
25104 }
25105 return SDValue();
25106}
25107
25108// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25109// each source element of a large type into the lowest elements of a smaller
25110// destination type. This is often generated during legalization.
25111// If the source node itself was a '*_extend_vector_inreg' node then we should
25112// then be able to remove it.
25114 SelectionDAG &DAG) {
25115 EVT VT = SVN->getValueType(0);
25116 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25117
25118 // TODO Add support for big-endian when we have a test case.
25119 if (!VT.isInteger() || IsBigEndian)
25120 return SDValue();
25121
25123
25124 unsigned Opcode = N0.getOpcode();
25125 if (!ISD::isExtVecInRegOpcode(Opcode))
25126 return SDValue();
25127
25128 SDValue N00 = N0.getOperand(0);
25129 ArrayRef<int> Mask = SVN->getMask();
25130 unsigned NumElts = VT.getVectorNumElements();
25131 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25132 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25133 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25134
25135 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25136 return SDValue();
25137 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25138
25139 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25140 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25141 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25142 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25143 for (unsigned i = 0; i != NumElts; ++i) {
25144 if (Mask[i] < 0)
25145 continue;
25146 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25147 continue;
25148 return false;
25149 }
25150 return true;
25151 };
25152
25153 // At the moment we just handle the case where we've truncated back to the
25154 // same size as before the extension.
25155 // TODO: handle more extension/truncation cases as cases arise.
25156 if (EltSizeInBits != ExtSrcSizeInBits)
25157 return SDValue();
25158
25159 // We can remove *extend_vector_inreg only if the truncation happens at
25160 // the same scale as the extension.
25161 if (isTruncate(ExtScale))
25162 return DAG.getBitcast(VT, N00);
25163
25164 return SDValue();
25165}
25166
25167// Combine shuffles of splat-shuffles of the form:
25168// shuffle (shuffle V, undef, splat-mask), undef, M
25169// If splat-mask contains undef elements, we need to be careful about
25170// introducing undef's in the folded mask which are not the result of composing
25171// the masks of the shuffles.
25173 SelectionDAG &DAG) {
25174 EVT VT = Shuf->getValueType(0);
25175 unsigned NumElts = VT.getVectorNumElements();
25176
25177 if (!Shuf->getOperand(1).isUndef())
25178 return SDValue();
25179
25180 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25181 // in disguise, with all demanded elements being identical.
25182 // FIXME: this can be done per-operand.
25183 if (!Shuf->isSplat()) {
25184 APInt DemandedElts(NumElts, 0);
25185 for (int Idx : Shuf->getMask()) {
25186 if (Idx < 0)
25187 continue; // Ignore sentinel indices.
25188 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25189 DemandedElts.setBit(Idx);
25190 }
25191 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25192 APInt UndefElts;
25193 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25194 // Even if all demanded elements are splat, some of them could be undef.
25195 // Which lowest demanded element is *not* known-undef?
25196 std::optional<unsigned> MinNonUndefIdx;
25197 for (int Idx : Shuf->getMask()) {
25198 if (Idx < 0 || UndefElts[Idx])
25199 continue; // Ignore sentinel indices, and undef elements.
25200 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25201 }
25202 if (!MinNonUndefIdx)
25203 return DAG.getUNDEF(VT); // All undef - result is undef.
25204 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25205 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25206 Shuf->getMask().end());
25207 for (int &Idx : SplatMask) {
25208 if (Idx < 0)
25209 continue; // Passthrough sentinel indices.
25210 // Otherwise, just pick the lowest demanded non-undef element.
25211 // Or sentinel undef, if we know we'd pick a known-undef element.
25212 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25213 }
25214 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25215 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25216 Shuf->getOperand(1), SplatMask);
25217 }
25218 }
25219
25220 // If the inner operand is a known splat with no undefs, just return that directly.
25221 // TODO: Create DemandedElts mask from Shuf's mask.
25222 // TODO: Allow undef elements and merge with the shuffle code below.
25223 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25224 return Shuf->getOperand(0);
25225
25226 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25227 if (!Splat || !Splat->isSplat())
25228 return SDValue();
25229
25230 ArrayRef<int> ShufMask = Shuf->getMask();
25231 ArrayRef<int> SplatMask = Splat->getMask();
25232 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25233
25234 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25235 // every undef mask element in the splat-shuffle has a corresponding undef
25236 // element in the user-shuffle's mask or if the composition of mask elements
25237 // would result in undef.
25238 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25239 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25240 // In this case it is not legal to simplify to the splat-shuffle because we
25241 // may be exposing the users of the shuffle an undef element at index 1
25242 // which was not there before the combine.
25243 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25244 // In this case the composition of masks yields SplatMask, so it's ok to
25245 // simplify to the splat-shuffle.
25246 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25247 // In this case the composed mask includes all undef elements of SplatMask
25248 // and in addition sets element zero to undef. It is safe to simplify to
25249 // the splat-shuffle.
25250 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25251 ArrayRef<int> SplatMask) {
25252 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25253 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25254 SplatMask[UserMask[i]] != -1)
25255 return false;
25256 return true;
25257 };
25258 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25259 return Shuf->getOperand(0);
25260
25261 // Create a new shuffle with a mask that is composed of the two shuffles'
25262 // masks.
25263 SmallVector<int, 32> NewMask;
25264 for (int Idx : ShufMask)
25265 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25266
25267 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25268 Splat->getOperand(0), Splat->getOperand(1),
25269 NewMask);
25270}
25271
25272// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25273// the mask can be treated as a larger type.
25275 SelectionDAG &DAG,
25276 const TargetLowering &TLI,
25277 bool LegalOperations) {
25278 SDValue Op0 = SVN->getOperand(0);
25279 SDValue Op1 = SVN->getOperand(1);
25280 EVT VT = SVN->getValueType(0);
25281 if (Op0.getOpcode() != ISD::BITCAST)
25282 return SDValue();
25283 EVT InVT = Op0.getOperand(0).getValueType();
25284 if (!InVT.isVector() ||
25285 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25286 Op1.getOperand(0).getValueType() != InVT)))
25287 return SDValue();
25289 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25290 return SDValue();
25291
25292 int VTLanes = VT.getVectorNumElements();
25293 int InLanes = InVT.getVectorNumElements();
25294 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25295 (LegalOperations &&
25297 return SDValue();
25298 int Factor = VTLanes / InLanes;
25299
25300 // Check that each group of lanes in the mask are either undef or make a valid
25301 // mask for the wider lane type.
25302 ArrayRef<int> Mask = SVN->getMask();
25303 SmallVector<int> NewMask;
25304 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25305 return SDValue();
25306
25307 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25308 return SDValue();
25309
25310 // Create the new shuffle with the new mask and bitcast it back to the
25311 // original type.
25312 SDLoc DL(SVN);
25313 Op0 = Op0.getOperand(0);
25314 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25315 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25316 return DAG.getBitcast(VT, NewShuf);
25317}
25318
25319/// Combine shuffle of shuffle of the form:
25320/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25322 SelectionDAG &DAG) {
25323 if (!OuterShuf->getOperand(1).isUndef())
25324 return SDValue();
25325 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25326 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25327 return SDValue();
25328
25329 ArrayRef<int> OuterMask = OuterShuf->getMask();
25330 ArrayRef<int> InnerMask = InnerShuf->getMask();
25331 unsigned NumElts = OuterMask.size();
25332 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25333 SmallVector<int, 32> CombinedMask(NumElts, -1);
25334 int SplatIndex = -1;
25335 for (unsigned i = 0; i != NumElts; ++i) {
25336 // Undef lanes remain undef.
25337 int OuterMaskElt = OuterMask[i];
25338 if (OuterMaskElt == -1)
25339 continue;
25340
25341 // Peek through the shuffle masks to get the underlying source element.
25342 int InnerMaskElt = InnerMask[OuterMaskElt];
25343 if (InnerMaskElt == -1)
25344 continue;
25345
25346 // Initialize the splatted element.
25347 if (SplatIndex == -1)
25348 SplatIndex = InnerMaskElt;
25349
25350 // Non-matching index - this is not a splat.
25351 if (SplatIndex != InnerMaskElt)
25352 return SDValue();
25353
25354 CombinedMask[i] = InnerMaskElt;
25355 }
25356 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25357 getSplatIndex(CombinedMask) != -1) &&
25358 "Expected a splat mask");
25359
25360 // TODO: The transform may be a win even if the mask is not legal.
25361 EVT VT = OuterShuf->getValueType(0);
25362 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25363 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25364 return SDValue();
25365
25366 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25367 InnerShuf->getOperand(1), CombinedMask);
25368}
25369
25370/// If the shuffle mask is taking exactly one element from the first vector
25371/// operand and passing through all other elements from the second vector
25372/// operand, return the index of the mask element that is choosing an element
25373/// from the first operand. Otherwise, return -1.
25375 int MaskSize = Mask.size();
25376 int EltFromOp0 = -1;
25377 // TODO: This does not match if there are undef elements in the shuffle mask.
25378 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25379 // removing an instruction (a shuffle), but losing the knowledge that some
25380 // vector lanes are not needed.
25381 for (int i = 0; i != MaskSize; ++i) {
25382 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25383 // We're looking for a shuffle of exactly one element from operand 0.
25384 if (EltFromOp0 != -1)
25385 return -1;
25386 EltFromOp0 = i;
25387 } else if (Mask[i] != i + MaskSize) {
25388 // Nothing from operand 1 can change lanes.
25389 return -1;
25390 }
25391 }
25392 return EltFromOp0;
25393}
25394
25395/// If a shuffle inserts exactly one element from a source vector operand into
25396/// another vector operand and we can access the specified element as a scalar,
25397/// then we can eliminate the shuffle.
25399 SelectionDAG &DAG) {
25400 // First, check if we are taking one element of a vector and shuffling that
25401 // element into another vector.
25402 ArrayRef<int> Mask = Shuf->getMask();
25403 SmallVector<int, 16> CommutedMask(Mask);
25404 SDValue Op0 = Shuf->getOperand(0);
25405 SDValue Op1 = Shuf->getOperand(1);
25406 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25407 if (ShufOp0Index == -1) {
25408 // Commute mask and check again.
25410 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25411 if (ShufOp0Index == -1)
25412 return SDValue();
25413 // Commute operands to match the commuted shuffle mask.
25414 std::swap(Op0, Op1);
25415 Mask = CommutedMask;
25416 }
25417
25418 // The shuffle inserts exactly one element from operand 0 into operand 1.
25419 // Now see if we can access that element as a scalar via a real insert element
25420 // instruction.
25421 // TODO: We can try harder to locate the element as a scalar. Examples: it
25422 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25423 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25424 "Shuffle mask value must be from operand 0");
25425 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25426 return SDValue();
25427
25428 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25429 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25430 return SDValue();
25431
25432 // There's an existing insertelement with constant insertion index, so we
25433 // don't need to check the legality/profitability of a replacement operation
25434 // that differs at most in the constant value. The target should be able to
25435 // lower any of those in a similar way. If not, legalization will expand this
25436 // to a scalar-to-vector plus shuffle.
25437 //
25438 // Note that the shuffle may move the scalar from the position that the insert
25439 // element used. Therefore, our new insert element occurs at the shuffle's
25440 // mask index value, not the insert's index value.
25441 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25442 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25443 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25444 Op1, Op0.getOperand(1), NewInsIndex);
25445}
25446
25447/// If we have a unary shuffle of a shuffle, see if it can be folded away
25448/// completely. This has the potential to lose undef knowledge because the first
25449/// shuffle may not have an undef mask element where the second one does. So
25450/// only call this after doing simplifications based on demanded elements.
25452 // shuf (shuf0 X, Y, Mask0), undef, Mask
25453 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25454 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25455 return SDValue();
25456
25457 ArrayRef<int> Mask = Shuf->getMask();
25458 ArrayRef<int> Mask0 = Shuf0->getMask();
25459 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25460 // Ignore undef elements.
25461 if (Mask[i] == -1)
25462 continue;
25463 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25464
25465 // Is the element of the shuffle operand chosen by this shuffle the same as
25466 // the element chosen by the shuffle operand itself?
25467 if (Mask0[Mask[i]] != Mask0[i])
25468 return SDValue();
25469 }
25470 // Every element of this shuffle is identical to the result of the previous
25471 // shuffle, so we can replace this value.
25472 return Shuf->getOperand(0);
25473}
25474
25475SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25476 EVT VT = N->getValueType(0);
25477 unsigned NumElts = VT.getVectorNumElements();
25478
25479 SDValue N0 = N->getOperand(0);
25480 SDValue N1 = N->getOperand(1);
25481
25482 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25483
25484 // Canonicalize shuffle undef, undef -> undef
25485 if (N0.isUndef() && N1.isUndef())
25486 return DAG.getUNDEF(VT);
25487
25488 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25489
25490 // Canonicalize shuffle v, v -> v, undef
25491 if (N0 == N1)
25492 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25493 createUnaryMask(SVN->getMask(), NumElts));
25494
25495 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25496 if (N0.isUndef())
25497 return DAG.getCommutedVectorShuffle(*SVN);
25498
25499 // Remove references to rhs if it is undef
25500 if (N1.isUndef()) {
25501 bool Changed = false;
25502 SmallVector<int, 8> NewMask;
25503 for (unsigned i = 0; i != NumElts; ++i) {
25504 int Idx = SVN->getMaskElt(i);
25505 if (Idx >= (int)NumElts) {
25506 Idx = -1;
25507 Changed = true;
25508 }
25509 NewMask.push_back(Idx);
25510 }
25511 if (Changed)
25512 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25513 }
25514
25515 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25516 return InsElt;
25517
25518 // A shuffle of a single vector that is a splatted value can always be folded.
25519 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25520 return V;
25521
25522 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25523 return V;
25524
25525 // If it is a splat, check if the argument vector is another splat or a
25526 // build_vector.
25527 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25528 int SplatIndex = SVN->getSplatIndex();
25529 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25530 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25531 // splat (vector_bo L, R), Index -->
25532 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25533 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25534 SDLoc DL(N);
25535 EVT EltVT = VT.getScalarType();
25536 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25537 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25538 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25539 SDValue NewBO =
25540 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25541 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25543 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25544 }
25545
25546 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25547 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25548 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25549 N0.hasOneUse()) {
25550 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25551 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25552
25554 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25555 if (Idx->getAPIntValue() == SplatIndex)
25556 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25557
25558 // Look through a bitcast if LE and splatting lane 0, through to a
25559 // scalar_to_vector or a build_vector.
25560 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25561 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25564 EVT N00VT = N0.getOperand(0).getValueType();
25565 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25566 VT.isInteger() && N00VT.isInteger()) {
25567 EVT InVT =
25570 SDLoc(N), InVT);
25571 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25572 }
25573 }
25574 }
25575
25576 // If this is a bit convert that changes the element type of the vector but
25577 // not the number of vector elements, look through it. Be careful not to
25578 // look though conversions that change things like v4f32 to v2f64.
25579 SDNode *V = N0.getNode();
25580 if (V->getOpcode() == ISD::BITCAST) {
25581 SDValue ConvInput = V->getOperand(0);
25582 if (ConvInput.getValueType().isVector() &&
25583 ConvInput.getValueType().getVectorNumElements() == NumElts)
25584 V = ConvInput.getNode();
25585 }
25586
25587 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25588 assert(V->getNumOperands() == NumElts &&
25589 "BUILD_VECTOR has wrong number of operands");
25590 SDValue Base;
25591 bool AllSame = true;
25592 for (unsigned i = 0; i != NumElts; ++i) {
25593 if (!V->getOperand(i).isUndef()) {
25594 Base = V->getOperand(i);
25595 break;
25596 }
25597 }
25598 // Splat of <u, u, u, u>, return <u, u, u, u>
25599 if (!Base.getNode())
25600 return N0;
25601 for (unsigned i = 0; i != NumElts; ++i) {
25602 if (V->getOperand(i) != Base) {
25603 AllSame = false;
25604 break;
25605 }
25606 }
25607 // Splat of <x, x, x, x>, return <x, x, x, x>
25608 if (AllSame)
25609 return N0;
25610
25611 // Canonicalize any other splat as a build_vector.
25612 SDValue Splatted = V->getOperand(SplatIndex);
25613 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25614 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25615
25616 // We may have jumped through bitcasts, so the type of the
25617 // BUILD_VECTOR may not match the type of the shuffle.
25618 if (V->getValueType(0) != VT)
25619 NewBV = DAG.getBitcast(VT, NewBV);
25620 return NewBV;
25621 }
25622 }
25623
25624 // Simplify source operands based on shuffle mask.
25626 return SDValue(N, 0);
25627
25628 // This is intentionally placed after demanded elements simplification because
25629 // it could eliminate knowledge of undef elements created by this shuffle.
25630 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25631 return ShufOp;
25632
25633 // Match shuffles that can be converted to any_vector_extend_in_reg.
25634 if (SDValue V =
25635 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25636 return V;
25637
25638 // Combine "truncate_vector_in_reg" style shuffles.
25639 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25640 return V;
25641
25642 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25643 Level < AfterLegalizeVectorOps &&
25644 (N1.isUndef() ||
25645 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25646 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25647 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25648 return V;
25649 }
25650
25651 // A shuffle of a concat of the same narrow vector can be reduced to use
25652 // only low-half elements of a concat with undef:
25653 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25654 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25655 N0.getNumOperands() == 2 &&
25656 N0.getOperand(0) == N0.getOperand(1)) {
25657 int HalfNumElts = (int)NumElts / 2;
25658 SmallVector<int, 8> NewMask;
25659 for (unsigned i = 0; i != NumElts; ++i) {
25660 int Idx = SVN->getMaskElt(i);
25661 if (Idx >= HalfNumElts) {
25662 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25663 Idx -= HalfNumElts;
25664 }
25665 NewMask.push_back(Idx);
25666 }
25667 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25668 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25669 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25670 N0.getOperand(0), UndefVec);
25671 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25672 }
25673 }
25674
25675 // See if we can replace a shuffle with an insert_subvector.
25676 // e.g. v2i32 into v8i32:
25677 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25678 // --> insert_subvector(lhs,rhs1,4).
25679 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25681 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25682 // Ensure RHS subvectors are legal.
25683 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25684 EVT SubVT = RHS.getOperand(0).getValueType();
25685 int NumSubVecs = RHS.getNumOperands();
25686 int NumSubElts = SubVT.getVectorNumElements();
25687 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25688 if (!TLI.isTypeLegal(SubVT))
25689 return SDValue();
25690
25691 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25692 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25693 return SDValue();
25694
25695 // Search [NumSubElts] spans for RHS sequence.
25696 // TODO: Can we avoid nested loops to increase performance?
25697 SmallVector<int> InsertionMask(NumElts);
25698 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25699 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25700 // Reset mask to identity.
25701 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25702
25703 // Add subvector insertion.
25704 std::iota(InsertionMask.begin() + SubIdx,
25705 InsertionMask.begin() + SubIdx + NumSubElts,
25706 NumElts + (SubVec * NumSubElts));
25707
25708 // See if the shuffle mask matches the reference insertion mask.
25709 bool MatchingShuffle = true;
25710 for (int i = 0; i != (int)NumElts; ++i) {
25711 int ExpectIdx = InsertionMask[i];
25712 int ActualIdx = Mask[i];
25713 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25714 MatchingShuffle = false;
25715 break;
25716 }
25717 }
25718
25719 if (MatchingShuffle)
25720 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25721 RHS.getOperand(SubVec),
25722 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25723 }
25724 }
25725 return SDValue();
25726 };
25727 ArrayRef<int> Mask = SVN->getMask();
25728 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25729 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25730 return InsertN1;
25731 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25732 SmallVector<int> CommuteMask(Mask);
25734 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25735 return InsertN0;
25736 }
25737 }
25738
25739 // If we're not performing a select/blend shuffle, see if we can convert the
25740 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25741 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25742 bool IsInLaneMask = true;
25743 ArrayRef<int> Mask = SVN->getMask();
25744 SmallVector<int, 16> ClearMask(NumElts, -1);
25745 APInt DemandedLHS = APInt::getZero(NumElts);
25746 APInt DemandedRHS = APInt::getZero(NumElts);
25747 for (int I = 0; I != (int)NumElts; ++I) {
25748 int M = Mask[I];
25749 if (M < 0)
25750 continue;
25751 ClearMask[I] = M == I ? I : (I + NumElts);
25752 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25753 if (M != I) {
25754 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25755 Demanded.setBit(M % NumElts);
25756 }
25757 }
25758 // TODO: Should we try to mask with N1 as well?
25759 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25760 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25761 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25762 SDLoc DL(N);
25765 // Transform the type to a legal type so that the buildvector constant
25766 // elements are not illegal. Make sure that the result is larger than the
25767 // original type, incase the value is split into two (eg i64->i32).
25768 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25769 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25770 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25771 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25772 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25773 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25774 for (int I = 0; I != (int)NumElts; ++I)
25775 if (0 <= Mask[I])
25776 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25777
25778 // See if a clear mask is legal instead of going via
25779 // XformToShuffleWithZero which loses UNDEF mask elements.
25780 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25781 return DAG.getBitcast(
25782 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25783 DAG.getConstant(0, DL, IntVT), ClearMask));
25784
25785 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25786 return DAG.getBitcast(
25787 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25788 DAG.getBuildVector(IntVT, DL, AndMask)));
25789 }
25790 }
25791 }
25792
25793 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25794 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25795 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25796 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25797 return Res;
25798
25799 // If this shuffle only has a single input that is a bitcasted shuffle,
25800 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25801 // back to their original types.
25802 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25803 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25804 TLI.isTypeLegal(VT)) {
25805
25807 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25808 EVT SVT = VT.getScalarType();
25809 EVT InnerVT = BC0->getValueType(0);
25810 EVT InnerSVT = InnerVT.getScalarType();
25811
25812 // Determine which shuffle works with the smaller scalar type.
25813 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25814 EVT ScaleSVT = ScaleVT.getScalarType();
25815
25816 if (TLI.isTypeLegal(ScaleVT) &&
25817 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25818 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25819 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25820 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25821
25822 // Scale the shuffle masks to the smaller scalar type.
25823 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25824 SmallVector<int, 8> InnerMask;
25825 SmallVector<int, 8> OuterMask;
25826 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25827 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25828
25829 // Merge the shuffle masks.
25830 SmallVector<int, 8> NewMask;
25831 for (int M : OuterMask)
25832 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25833
25834 // Test for shuffle mask legality over both commutations.
25835 SDValue SV0 = BC0->getOperand(0);
25836 SDValue SV1 = BC0->getOperand(1);
25837 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25838 if (!LegalMask) {
25839 std::swap(SV0, SV1);
25841 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25842 }
25843
25844 if (LegalMask) {
25845 SV0 = DAG.getBitcast(ScaleVT, SV0);
25846 SV1 = DAG.getBitcast(ScaleVT, SV1);
25847 return DAG.getBitcast(
25848 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25849 }
25850 }
25851 }
25852 }
25853
25854 // Match shuffles of bitcasts, so long as the mask can be treated as the
25855 // larger type.
25856 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25857 return V;
25858
25859 // Compute the combined shuffle mask for a shuffle with SV0 as the first
25860 // operand, and SV1 as the second operand.
25861 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25862 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25863 auto MergeInnerShuffle =
25864 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25865 ShuffleVectorSDNode *OtherSVN, SDValue N1,
25866 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25867 SmallVectorImpl<int> &Mask) -> bool {
25868 // Don't try to fold splats; they're likely to simplify somehow, or they
25869 // might be free.
25870 if (OtherSVN->isSplat())
25871 return false;
25872
25873 SV0 = SV1 = SDValue();
25874 Mask.clear();
25875
25876 for (unsigned i = 0; i != NumElts; ++i) {
25877 int Idx = SVN->getMaskElt(i);
25878 if (Idx < 0) {
25879 // Propagate Undef.
25880 Mask.push_back(Idx);
25881 continue;
25882 }
25883
25884 if (Commute)
25885 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25886
25887 SDValue CurrentVec;
25888 if (Idx < (int)NumElts) {
25889 // This shuffle index refers to the inner shuffle N0. Lookup the inner
25890 // shuffle mask to identify which vector is actually referenced.
25891 Idx = OtherSVN->getMaskElt(Idx);
25892 if (Idx < 0) {
25893 // Propagate Undef.
25894 Mask.push_back(Idx);
25895 continue;
25896 }
25897 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25898 : OtherSVN->getOperand(1);
25899 } else {
25900 // This shuffle index references an element within N1.
25901 CurrentVec = N1;
25902 }
25903
25904 // Simple case where 'CurrentVec' is UNDEF.
25905 if (CurrentVec.isUndef()) {
25906 Mask.push_back(-1);
25907 continue;
25908 }
25909
25910 // Canonicalize the shuffle index. We don't know yet if CurrentVec
25911 // will be the first or second operand of the combined shuffle.
25912 Idx = Idx % NumElts;
25913 if (!SV0.getNode() || SV0 == CurrentVec) {
25914 // Ok. CurrentVec is the left hand side.
25915 // Update the mask accordingly.
25916 SV0 = CurrentVec;
25917 Mask.push_back(Idx);
25918 continue;
25919 }
25920 if (!SV1.getNode() || SV1 == CurrentVec) {
25921 // Ok. CurrentVec is the right hand side.
25922 // Update the mask accordingly.
25923 SV1 = CurrentVec;
25924 Mask.push_back(Idx + NumElts);
25925 continue;
25926 }
25927
25928 // Last chance - see if the vector is another shuffle and if it
25929 // uses one of the existing candidate shuffle ops.
25930 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25931 int InnerIdx = CurrentSVN->getMaskElt(Idx);
25932 if (InnerIdx < 0) {
25933 Mask.push_back(-1);
25934 continue;
25935 }
25936 SDValue InnerVec = (InnerIdx < (int)NumElts)
25937 ? CurrentSVN->getOperand(0)
25938 : CurrentSVN->getOperand(1);
25939 if (InnerVec.isUndef()) {
25940 Mask.push_back(-1);
25941 continue;
25942 }
25943 InnerIdx %= NumElts;
25944 if (InnerVec == SV0) {
25945 Mask.push_back(InnerIdx);
25946 continue;
25947 }
25948 if (InnerVec == SV1) {
25949 Mask.push_back(InnerIdx + NumElts);
25950 continue;
25951 }
25952 }
25953
25954 // Bail out if we cannot convert the shuffle pair into a single shuffle.
25955 return false;
25956 }
25957
25958 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25959 return true;
25960
25961 // Avoid introducing shuffles with illegal mask.
25962 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25963 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25964 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25965 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25966 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25967 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25968 if (TLI.isShuffleMaskLegal(Mask, VT))
25969 return true;
25970
25971 std::swap(SV0, SV1);
25973 return TLI.isShuffleMaskLegal(Mask, VT);
25974 };
25975
25976 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25977 // Canonicalize shuffles according to rules:
25978 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25979 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25980 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25981 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25983 // The incoming shuffle must be of the same type as the result of the
25984 // current shuffle.
25985 assert(N1->getOperand(0).getValueType() == VT &&
25986 "Shuffle types don't match");
25987
25988 SDValue SV0 = N1->getOperand(0);
25989 SDValue SV1 = N1->getOperand(1);
25990 bool HasSameOp0 = N0 == SV0;
25991 bool IsSV1Undef = SV1.isUndef();
25992 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
25993 // Commute the operands of this shuffle so merging below will trigger.
25994 return DAG.getCommutedVectorShuffle(*SVN);
25995 }
25996
25997 // Canonicalize splat shuffles to the RHS to improve merging below.
25998 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
25999 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26000 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26001 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26002 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26003 return DAG.getCommutedVectorShuffle(*SVN);
26004 }
26005
26006 // Try to fold according to rules:
26007 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26008 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26009 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26010 // Don't try to fold shuffles with illegal type.
26011 // Only fold if this shuffle is the only user of the other shuffle.
26012 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26013 for (int i = 0; i != 2; ++i) {
26014 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26015 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26016 // The incoming shuffle must be of the same type as the result of the
26017 // current shuffle.
26018 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26019 assert(OtherSV->getOperand(0).getValueType() == VT &&
26020 "Shuffle types don't match");
26021
26022 SDValue SV0, SV1;
26024 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26025 SV0, SV1, Mask)) {
26026 // Check if all indices in Mask are Undef. In case, propagate Undef.
26027 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26028 return DAG.getUNDEF(VT);
26029
26030 return DAG.getVectorShuffle(VT, SDLoc(N),
26031 SV0 ? SV0 : DAG.getUNDEF(VT),
26032 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26033 }
26034 }
26035 }
26036
26037 // Merge shuffles through binops if we are able to merge it with at least
26038 // one other shuffles.
26039 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26040 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26041 unsigned SrcOpcode = N0.getOpcode();
26042 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26043 (N1.isUndef() ||
26044 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26045 // Get binop source ops, or just pass on the undef.
26046 SDValue Op00 = N0.getOperand(0);
26047 SDValue Op01 = N0.getOperand(1);
26048 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26049 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26050 // TODO: We might be able to relax the VT check but we don't currently
26051 // have any isBinOp() that has different result/ops VTs so play safe until
26052 // we have test coverage.
26053 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26054 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26055 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26056 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26057 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26058 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26059 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26060 SmallVectorImpl<int> &Mask, bool LeftOp,
26061 bool Commute) {
26062 SDValue InnerN = Commute ? N1 : N0;
26063 SDValue Op0 = LeftOp ? Op00 : Op01;
26064 SDValue Op1 = LeftOp ? Op10 : Op11;
26065 if (Commute)
26066 std::swap(Op0, Op1);
26067 // Only accept the merged shuffle if we don't introduce undef elements,
26068 // or the inner shuffle already contained undef elements.
26069 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26070 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26071 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26072 Mask) &&
26073 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26074 llvm::none_of(Mask, [](int M) { return M < 0; }));
26075 };
26076
26077 // Ensure we don't increase the number of shuffles - we must merge a
26078 // shuffle from at least one of the LHS and RHS ops.
26079 bool MergedLeft = false;
26080 SDValue LeftSV0, LeftSV1;
26081 SmallVector<int, 4> LeftMask;
26082 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26083 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26084 MergedLeft = true;
26085 } else {
26086 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26087 LeftSV0 = Op00, LeftSV1 = Op10;
26088 }
26089
26090 bool MergedRight = false;
26091 SDValue RightSV0, RightSV1;
26092 SmallVector<int, 4> RightMask;
26093 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26094 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26095 MergedRight = true;
26096 } else {
26097 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26098 RightSV0 = Op01, RightSV1 = Op11;
26099 }
26100
26101 if (MergedLeft || MergedRight) {
26102 SDLoc DL(N);
26104 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26105 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26107 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26108 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26109 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26110 }
26111 }
26112 }
26113 }
26114
26115 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26116 return V;
26117
26118 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26119 // Perform this really late, because it could eliminate knowledge
26120 // of undef elements created by this shuffle.
26121 if (Level < AfterLegalizeTypes)
26122 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26123 LegalOperations))
26124 return V;
26125
26126 return SDValue();
26127}
26128
26129SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26130 EVT VT = N->getValueType(0);
26131 if (!VT.isFixedLengthVector())
26132 return SDValue();
26133
26134 // Try to convert a scalar binop with an extracted vector element to a vector
26135 // binop. This is intended to reduce potentially expensive register moves.
26136 // TODO: Check if both operands are extracted.
26137 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26138 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26139 SDValue Scalar = N->getOperand(0);
26140 unsigned Opcode = Scalar.getOpcode();
26141 EVT VecEltVT = VT.getScalarType();
26142 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26143 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26144 Scalar.getOperand(0).getValueType() == VecEltVT &&
26145 Scalar.getOperand(1).getValueType() == VecEltVT &&
26146 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26147 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26148 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26149 // Match an extract element and get a shuffle mask equivalent.
26150 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26151
26152 for (int i : {0, 1}) {
26153 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26154 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26155 SDValue EE = Scalar.getOperand(i);
26156 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26157 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26158 EE.getOperand(0).getValueType() == VT &&
26159 isa<ConstantSDNode>(EE.getOperand(1))) {
26160 // Mask = {ExtractIndex, undef, undef....}
26161 ShufMask[0] = EE.getConstantOperandVal(1);
26162 // Make sure the shuffle is legal if we are crossing lanes.
26163 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26164 SDLoc DL(N);
26165 SDValue V[] = {EE.getOperand(0),
26166 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26167 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26168 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26169 ShufMask);
26170 }
26171 }
26172 }
26173 }
26174
26175 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26176 // with a VECTOR_SHUFFLE and possible truncate.
26177 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26178 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26179 return SDValue();
26180
26181 // If we have an implicit truncate, truncate here if it is legal.
26182 if (VecEltVT != Scalar.getValueType() &&
26183 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26184 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26185 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26186 }
26187
26188 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26189 if (!ExtIndexC)
26190 return SDValue();
26191
26192 SDValue SrcVec = Scalar.getOperand(0);
26193 EVT SrcVT = SrcVec.getValueType();
26194 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26195 unsigned VTNumElts = VT.getVectorNumElements();
26196 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26197 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26198 SmallVector<int, 8> Mask(SrcNumElts, -1);
26199 Mask[0] = ExtIndexC->getZExtValue();
26200 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26201 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26202 if (!LegalShuffle)
26203 return SDValue();
26204
26205 // If the initial vector is the same size, the shuffle is the result.
26206 if (VT == SrcVT)
26207 return LegalShuffle;
26208
26209 // If not, shorten the shuffled vector.
26210 if (VTNumElts != SrcNumElts) {
26211 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26212 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26213 SrcVT.getVectorElementType(), VTNumElts);
26214 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26215 ZeroIdx);
26216 }
26217 }
26218
26219 return SDValue();
26220}
26221
26222SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26223 EVT VT = N->getValueType(0);
26224 SDValue N0 = N->getOperand(0);
26225 SDValue N1 = N->getOperand(1);
26226 SDValue N2 = N->getOperand(2);
26227 uint64_t InsIdx = N->getConstantOperandVal(2);
26228
26229 // If inserting an UNDEF, just return the original vector.
26230 if (N1.isUndef())
26231 return N0;
26232
26233 // If this is an insert of an extracted vector into an undef vector, we can
26234 // just use the input to the extract if the types match, and can simplify
26235 // in some cases even if they don't.
26236 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26237 N1.getOperand(1) == N2) {
26238 EVT SrcVT = N1.getOperand(0).getValueType();
26239 if (SrcVT == VT)
26240 return N1.getOperand(0);
26241 // TODO: To remove the zero check, need to adjust the offset to
26242 // a multiple of the new src type.
26243 if (isNullConstant(N2) &&
26244 VT.isScalableVector() == SrcVT.isScalableVector()) {
26246 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26247 VT, N0, N1.getOperand(0), N2);
26248 else
26250 VT, N1.getOperand(0), N2);
26251 }
26252 }
26253
26254 // Handle case where we've ended up inserting back into the source vector
26255 // we extracted the subvector from.
26256 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26257 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26258 N1.getOperand(1) == N2)
26259 return N0;
26260
26261 // Simplify scalar inserts into an undef vector:
26262 // insert_subvector undef, (splat X), N2 -> splat X
26263 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26264 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26265 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26266
26267 // If we are inserting a bitcast value into an undef, with the same
26268 // number of elements, just use the bitcast input of the extract.
26269 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26270 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26271 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26273 N1.getOperand(0).getOperand(1) == N2 &&
26275 VT.getVectorElementCount() &&
26277 VT.getSizeInBits()) {
26278 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26279 }
26280
26281 // If both N1 and N2 are bitcast values on which insert_subvector
26282 // would makes sense, pull the bitcast through.
26283 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26284 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26285 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26286 SDValue CN0 = N0.getOperand(0);
26287 SDValue CN1 = N1.getOperand(0);
26288 EVT CN0VT = CN0.getValueType();
26289 EVT CN1VT = CN1.getValueType();
26290 if (CN0VT.isVector() && CN1VT.isVector() &&
26291 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26293 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26294 CN0.getValueType(), CN0, CN1, N2);
26295 return DAG.getBitcast(VT, NewINSERT);
26296 }
26297 }
26298
26299 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26300 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26301 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26302 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26303 N0.getOperand(1).getValueType() == N1.getValueType() &&
26304 N0.getOperand(2) == N2)
26305 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26306 N1, N2);
26307
26308 // Eliminate an intermediate insert into an undef vector:
26309 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26310 // insert_subvector undef, X, 0
26311 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26312 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26313 isNullConstant(N2))
26314 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26315 N1.getOperand(1), N2);
26316
26317 // Push subvector bitcasts to the output, adjusting the index as we go.
26318 // insert_subvector(bitcast(v), bitcast(s), c1)
26319 // -> bitcast(insert_subvector(v, s, c2))
26320 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26321 N1.getOpcode() == ISD::BITCAST) {
26322 SDValue N0Src = peekThroughBitcasts(N0);
26323 SDValue N1Src = peekThroughBitcasts(N1);
26324 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26325 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26326 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26327 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26328 EVT NewVT;
26329 SDLoc DL(N);
26330 SDValue NewIdx;
26331 LLVMContext &Ctx = *DAG.getContext();
26332 ElementCount NumElts = VT.getVectorElementCount();
26333 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26334 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26335 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26336 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26337 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26338 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26339 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26340 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26341 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26342 NumElts.divideCoefficientBy(Scale));
26343 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26344 }
26345 }
26346 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26347 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26348 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26349 return DAG.getBitcast(VT, Res);
26350 }
26351 }
26352 }
26353
26354 // Canonicalize insert_subvector dag nodes.
26355 // Example:
26356 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26357 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26358 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26359 N1.getValueType() == N0.getOperand(1).getValueType()) {
26360 unsigned OtherIdx = N0.getConstantOperandVal(2);
26361 if (InsIdx < OtherIdx) {
26362 // Swap nodes.
26363 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26364 N0.getOperand(0), N1, N2);
26365 AddToWorklist(NewOp.getNode());
26366 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26367 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26368 }
26369 }
26370
26371 // If the input vector is a concatenation, and the insert replaces
26372 // one of the pieces, we can optimize into a single concat_vectors.
26373 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26374 N0.getOperand(0).getValueType() == N1.getValueType() &&
26377 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26378 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26379 Ops[InsIdx / Factor] = N1;
26380 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26381 }
26382
26383 // Simplify source operands based on insertion.
26385 return SDValue(N, 0);
26386
26387 return SDValue();
26388}
26389
26390SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26391 SDValue N0 = N->getOperand(0);
26392
26393 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26394 if (N0->getOpcode() == ISD::FP16_TO_FP)
26395 return N0->getOperand(0);
26396
26397 return SDValue();
26398}
26399
26400SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26401 auto Op = N->getOpcode();
26403 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26404 SDValue N0 = N->getOperand(0);
26405
26406 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26407 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26408 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26410 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26411 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26412 }
26413 }
26414
26415 return SDValue();
26416}
26417
26418SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26419 SDValue N0 = N->getOperand(0);
26420
26421 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26422 if (N0->getOpcode() == ISD::BF16_TO_FP)
26423 return N0->getOperand(0);
26424
26425 return SDValue();
26426}
26427
26428SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26429 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26430 return visitFP16_TO_FP(N);
26431}
26432
26433SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26434 SDValue N0 = N->getOperand(0);
26435 EVT VT = N0.getValueType();
26436 unsigned Opcode = N->getOpcode();
26437
26438 // VECREDUCE over 1-element vector is just an extract.
26439 if (VT.getVectorElementCount().isScalar()) {
26440 SDLoc dl(N);
26441 SDValue Res =
26443 DAG.getVectorIdxConstant(0, dl));
26444 if (Res.getValueType() != N->getValueType(0))
26445 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26446 return Res;
26447 }
26448
26449 // On an boolean vector an and/or reduction is the same as a umin/umax
26450 // reduction. Convert them if the latter is legal while the former isn't.
26451 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26452 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26454 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26455 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26457 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26458 }
26459
26460 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26461 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26462 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26463 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26464 SDValue Vec = N0.getOperand(0);
26465 SDValue Subvec = N0.getOperand(1);
26466 if ((Opcode == ISD::VECREDUCE_OR &&
26467 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26468 (Opcode == ISD::VECREDUCE_AND &&
26469 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26470 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26471 }
26472
26473 return SDValue();
26474}
26475
26476SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26477 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26478
26479 // FSUB -> FMA combines:
26480 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26481 AddToWorklist(Fused.getNode());
26482 return Fused;
26483 }
26484 return SDValue();
26485}
26486
26487SDValue DAGCombiner::visitVPOp(SDNode *N) {
26488
26489 if (N->getOpcode() == ISD::VP_GATHER)
26490 if (SDValue SD = visitVPGATHER(N))
26491 return SD;
26492
26493 if (N->getOpcode() == ISD::VP_SCATTER)
26494 if (SDValue SD = visitVPSCATTER(N))
26495 return SD;
26496
26497 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26498 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26499 return SD;
26500
26501 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26502 if (SDValue SD = visitVP_STRIDED_STORE(N))
26503 return SD;
26504
26505 // VP operations in which all vector elements are disabled - either by
26506 // determining that the mask is all false or that the EVL is 0 - can be
26507 // eliminated.
26508 bool AreAllEltsDisabled = false;
26509 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26510 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26511 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26512 AreAllEltsDisabled |=
26513 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26514
26515 // This is the only generic VP combine we support for now.
26516 if (!AreAllEltsDisabled) {
26517 switch (N->getOpcode()) {
26518 case ISD::VP_FADD:
26519 return visitVP_FADD(N);
26520 case ISD::VP_FSUB:
26521 return visitVP_FSUB(N);
26522 case ISD::VP_FMA:
26523 return visitFMA<VPMatchContext>(N);
26524 case ISD::VP_SELECT:
26525 return visitVP_SELECT(N);
26526 }
26527 return SDValue();
26528 }
26529
26530 // Binary operations can be replaced by UNDEF.
26531 if (ISD::isVPBinaryOp(N->getOpcode()))
26532 return DAG.getUNDEF(N->getValueType(0));
26533
26534 // VP Memory operations can be replaced by either the chain (stores) or the
26535 // chain + undef (loads).
26536 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26537 if (MemSD->writeMem())
26538 return MemSD->getChain();
26539 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26540 }
26541
26542 // Reduction operations return the start operand when no elements are active.
26543 if (ISD::isVPReduction(N->getOpcode()))
26544 return N->getOperand(0);
26545
26546 return SDValue();
26547}
26548
26549SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26550 SDValue Chain = N->getOperand(0);
26551 SDValue Ptr = N->getOperand(1);
26552 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26553
26554 // Check if the memory, where FP state is written to, is used only in a single
26555 // load operation.
26556 LoadSDNode *LdNode = nullptr;
26557 for (auto *U : Ptr->uses()) {
26558 if (U == N)
26559 continue;
26560 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26561 if (LdNode && LdNode != Ld)
26562 return SDValue();
26563 LdNode = Ld;
26564 continue;
26565 }
26566 return SDValue();
26567 }
26568 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26569 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26571 return SDValue();
26572
26573 // Check if the loaded value is used only in a store operation.
26574 StoreSDNode *StNode = nullptr;
26575 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26576 SDUse &U = I.getUse();
26577 if (U.getResNo() == 0) {
26578 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26579 if (StNode)
26580 return SDValue();
26581 StNode = St;
26582 } else {
26583 return SDValue();
26584 }
26585 }
26586 }
26587 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26588 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26589 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26590 return SDValue();
26591
26592 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26593 // environment.
26594 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26595 StNode->getMemOperand());
26596 CombineTo(StNode, Res, false);
26597 return Res;
26598}
26599
26600SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26601 SDValue Chain = N->getOperand(0);
26602 SDValue Ptr = N->getOperand(1);
26603 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26604
26605 // Check if the address of FP state is used also in a store operation only.
26606 StoreSDNode *StNode = nullptr;
26607 for (auto *U : Ptr->uses()) {
26608 if (U == N)
26609 continue;
26610 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26611 if (StNode && StNode != St)
26612 return SDValue();
26613 StNode = St;
26614 continue;
26615 }
26616 return SDValue();
26617 }
26618 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26619 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26620 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26621 return SDValue();
26622
26623 // Check if the stored value is loaded from some location and the loaded
26624 // value is used only in the store operation.
26625 SDValue StValue = StNode->getValue();
26626 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26627 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26628 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26629 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26630 return SDValue();
26631
26632 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26633 // environment.
26634 SDValue Res =
26635 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26636 LdNode->getMemOperand());
26637 return Res;
26638}
26639
26640/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26641/// with the destination vector and a zero vector.
26642/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26643/// vector_shuffle V, Zero, <0, 4, 2, 4>
26644SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26645 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26646
26647 EVT VT = N->getValueType(0);
26648 SDValue LHS = N->getOperand(0);
26649 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26650 SDLoc DL(N);
26651
26652 // Make sure we're not running after operation legalization where it
26653 // may have custom lowered the vector shuffles.
26654 if (LegalOperations)
26655 return SDValue();
26656
26657 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26658 return SDValue();
26659
26660 EVT RVT = RHS.getValueType();
26661 unsigned NumElts = RHS.getNumOperands();
26662
26663 // Attempt to create a valid clear mask, splitting the mask into
26664 // sub elements and checking to see if each is
26665 // all zeros or all ones - suitable for shuffle masking.
26666 auto BuildClearMask = [&](int Split) {
26667 int NumSubElts = NumElts * Split;
26668 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26669
26670 SmallVector<int, 8> Indices;
26671 for (int i = 0; i != NumSubElts; ++i) {
26672 int EltIdx = i / Split;
26673 int SubIdx = i % Split;
26674 SDValue Elt = RHS.getOperand(EltIdx);
26675 // X & undef --> 0 (not undef). So this lane must be converted to choose
26676 // from the zero constant vector (same as if the element had all 0-bits).
26677 if (Elt.isUndef()) {
26678 Indices.push_back(i + NumSubElts);
26679 continue;
26680 }
26681
26682 APInt Bits;
26683 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26684 Bits = Cst->getAPIntValue();
26685 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26686 Bits = CstFP->getValueAPF().bitcastToAPInt();
26687 else
26688 return SDValue();
26689
26690 // Extract the sub element from the constant bit mask.
26691 if (DAG.getDataLayout().isBigEndian())
26692 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26693 else
26694 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26695
26696 if (Bits.isAllOnes())
26697 Indices.push_back(i);
26698 else if (Bits == 0)
26699 Indices.push_back(i + NumSubElts);
26700 else
26701 return SDValue();
26702 }
26703
26704 // Let's see if the target supports this vector_shuffle.
26705 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26706 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26707 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26708 return SDValue();
26709
26710 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26711 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26712 DAG.getBitcast(ClearVT, LHS),
26713 Zero, Indices));
26714 };
26715
26716 // Determine maximum split level (byte level masking).
26717 int MaxSplit = 1;
26718 if (RVT.getScalarSizeInBits() % 8 == 0)
26719 MaxSplit = RVT.getScalarSizeInBits() / 8;
26720
26721 for (int Split = 1; Split <= MaxSplit; ++Split)
26722 if (RVT.getScalarSizeInBits() % Split == 0)
26723 if (SDValue S = BuildClearMask(Split))
26724 return S;
26725
26726 return SDValue();
26727}
26728
26729/// If a vector binop is performed on splat values, it may be profitable to
26730/// extract, scalarize, and insert/splat.
26732 const SDLoc &DL) {
26733 SDValue N0 = N->getOperand(0);
26734 SDValue N1 = N->getOperand(1);
26735 unsigned Opcode = N->getOpcode();
26736 EVT VT = N->getValueType(0);
26737 EVT EltVT = VT.getVectorElementType();
26738 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26739
26740 // TODO: Remove/replace the extract cost check? If the elements are available
26741 // as scalars, then there may be no extract cost. Should we ask if
26742 // inserting a scalar back into a vector is cheap instead?
26743 int Index0, Index1;
26744 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26745 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26746 // Extract element from splat_vector should be free.
26747 // TODO: use DAG.isSplatValue instead?
26748 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26750 if (!Src0 || !Src1 || Index0 != Index1 ||
26751 Src0.getValueType().getVectorElementType() != EltVT ||
26752 Src1.getValueType().getVectorElementType() != EltVT ||
26753 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26754 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26755 return SDValue();
26756
26757 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26758 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26759 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26760 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26761
26762 // If all lanes but 1 are undefined, no need to splat the scalar result.
26763 // TODO: Keep track of undefs and use that info in the general case.
26764 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26765 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26766 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26767 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26768 // build_vec ..undef, (bo X, Y), undef...
26770 Ops[Index0] = ScalarBO;
26771 return DAG.getBuildVector(VT, DL, Ops);
26772 }
26773
26774 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26775 return DAG.getSplat(VT, DL, ScalarBO);
26776}
26777
26778/// Visit a vector cast operation, like FP_EXTEND.
26779SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26780 EVT VT = N->getValueType(0);
26781 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26782 EVT EltVT = VT.getVectorElementType();
26783 unsigned Opcode = N->getOpcode();
26784
26785 SDValue N0 = N->getOperand(0);
26786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26787
26788 // TODO: promote operation might be also good here?
26789 int Index0;
26790 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26791 if (Src0 &&
26792 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26793 TLI.isExtractVecEltCheap(VT, Index0)) &&
26794 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26795 TLI.preferScalarizeSplat(N)) {
26796 EVT SrcVT = N0.getValueType();
26797 EVT SrcEltVT = SrcVT.getVectorElementType();
26798 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26799 SDValue Elt =
26800 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26801 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26802 if (VT.isScalableVector())
26803 return DAG.getSplatVector(VT, DL, ScalarBO);
26805 return DAG.getBuildVector(VT, DL, Ops);
26806 }
26807
26808 return SDValue();
26809}
26810
26811/// Visit a binary vector operation, like ADD.
26812SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26813 EVT VT = N->getValueType(0);
26814 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26815
26816 SDValue LHS = N->getOperand(0);
26817 SDValue RHS = N->getOperand(1);
26818 unsigned Opcode = N->getOpcode();
26819 SDNodeFlags Flags = N->getFlags();
26820
26821 // Move unary shuffles with identical masks after a vector binop:
26822 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26823 // --> shuffle (VBinOp A, B), Undef, Mask
26824 // This does not require type legality checks because we are creating the
26825 // same types of operations that are in the original sequence. We do have to
26826 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26827 // though. This code is adapted from the identical transform in instcombine.
26828 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26829 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26830 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26831 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26832 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26833 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26834 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26835 RHS.getOperand(0), Flags);
26836 SDValue UndefV = LHS.getOperand(1);
26837 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26838 }
26839
26840 // Try to sink a splat shuffle after a binop with a uniform constant.
26841 // This is limited to cases where neither the shuffle nor the constant have
26842 // undefined elements because that could be poison-unsafe or inhibit
26843 // demanded elements analysis. It is further limited to not change a splat
26844 // of an inserted scalar because that may be optimized better by
26845 // load-folding or other target-specific behaviors.
26846 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26847 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26848 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26849 // binop (splat X), (splat C) --> splat (binop X, C)
26850 SDValue X = Shuf0->getOperand(0);
26851 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26852 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26853 Shuf0->getMask());
26854 }
26855 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26856 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26857 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26858 // binop (splat C), (splat X) --> splat (binop C, X)
26859 SDValue X = Shuf1->getOperand(0);
26860 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26861 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26862 Shuf1->getMask());
26863 }
26864 }
26865
26866 // The following pattern is likely to emerge with vector reduction ops. Moving
26867 // the binary operation ahead of insertion may allow using a narrower vector
26868 // instruction that has better performance than the wide version of the op:
26869 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26870 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26871 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26872 LHS.getOperand(2) == RHS.getOperand(2) &&
26873 (LHS.hasOneUse() || RHS.hasOneUse())) {
26874 SDValue X = LHS.getOperand(1);
26875 SDValue Y = RHS.getOperand(1);
26876 SDValue Z = LHS.getOperand(2);
26877 EVT NarrowVT = X.getValueType();
26878 if (NarrowVT == Y.getValueType() &&
26879 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26880 LegalOperations)) {
26881 // (binop undef, undef) may not return undef, so compute that result.
26882 SDValue VecC =
26883 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26884 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26885 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26886 }
26887 }
26888
26889 // Make sure all but the first op are undef or constant.
26890 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26891 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26892 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26893 return Op.isUndef() ||
26894 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26895 });
26896 };
26897
26898 // The following pattern is likely to emerge with vector reduction ops. Moving
26899 // the binary operation ahead of the concat may allow using a narrower vector
26900 // instruction that has better performance than the wide version of the op:
26901 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26902 // concat (VBinOp X, Y), VecC
26903 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26904 (LHS.hasOneUse() || RHS.hasOneUse())) {
26905 EVT NarrowVT = LHS.getOperand(0).getValueType();
26906 if (NarrowVT == RHS.getOperand(0).getValueType() &&
26907 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26908 unsigned NumOperands = LHS.getNumOperands();
26909 SmallVector<SDValue, 4> ConcatOps;
26910 for (unsigned i = 0; i != NumOperands; ++i) {
26911 // This constant fold for operands 1 and up.
26912 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26913 RHS.getOperand(i)));
26914 }
26915
26916 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26917 }
26918 }
26919
26920 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26921 return V;
26922
26923 return SDValue();
26924}
26925
26926SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26927 SDValue N2) {
26928 assert(N0.getOpcode() == ISD::SETCC &&
26929 "First argument must be a SetCC node!");
26930
26931 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26932 cast<CondCodeSDNode>(N0.getOperand(2))->get());
26933
26934 // If we got a simplified select_cc node back from SimplifySelectCC, then
26935 // break it down into a new SETCC node, and a new SELECT node, and then return
26936 // the SELECT node, since we were called with a SELECT node.
26937 if (SCC.getNode()) {
26938 // Check to see if we got a select_cc back (to turn into setcc/select).
26939 // Otherwise, just return whatever node we got back, like fabs.
26940 if (SCC.getOpcode() == ISD::SELECT_CC) {
26941 const SDNodeFlags Flags = N0->getFlags();
26943 N0.getValueType(),
26944 SCC.getOperand(0), SCC.getOperand(1),
26945 SCC.getOperand(4), Flags);
26946 AddToWorklist(SETCC.getNode());
26947 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26948 SCC.getOperand(2), SCC.getOperand(3));
26949 SelectNode->setFlags(Flags);
26950 return SelectNode;
26951 }
26952
26953 return SCC;
26954 }
26955 return SDValue();
26956}
26957
26958/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26959/// being selected between, see if we can simplify the select. Callers of this
26960/// should assume that TheSelect is deleted if this returns true. As such, they
26961/// should return the appropriate thing (e.g. the node) back to the top-level of
26962/// the DAG combiner loop to avoid it being looked at.
26963bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26964 SDValue RHS) {
26965 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26966 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26967 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26968 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26969 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26970 SDValue Sqrt = RHS;
26972 SDValue CmpLHS;
26973 const ConstantFPSDNode *Zero = nullptr;
26974
26975 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26976 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26977 CmpLHS = TheSelect->getOperand(0);
26978 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26979 } else {
26980 // SELECT or VSELECT
26981 SDValue Cmp = TheSelect->getOperand(0);
26982 if (Cmp.getOpcode() == ISD::SETCC) {
26983 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26984 CmpLHS = Cmp.getOperand(0);
26985 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26986 }
26987 }
26988 if (Zero && Zero->isZero() &&
26989 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
26990 CC == ISD::SETULT || CC == ISD::SETLT)) {
26991 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26992 CombineTo(TheSelect, Sqrt);
26993 return true;
26994 }
26995 }
26996 }
26997 // Cannot simplify select with vector condition
26998 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
26999
27000 // If this is a select from two identical things, try to pull the operation
27001 // through the select.
27002 if (LHS.getOpcode() != RHS.getOpcode() ||
27003 !LHS.hasOneUse() || !RHS.hasOneUse())
27004 return false;
27005
27006 // If this is a load and the token chain is identical, replace the select
27007 // of two loads with a load through a select of the address to load from.
27008 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27009 // constants have been dropped into the constant pool.
27010 if (LHS.getOpcode() == ISD::LOAD) {
27011 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27012 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27013
27014 // Token chains must be identical.
27015 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27016 // Do not let this transformation reduce the number of volatile loads.
27017 // Be conservative for atomics for the moment
27018 // TODO: This does appear to be legal for unordered atomics (see D66309)
27019 !LLD->isSimple() || !RLD->isSimple() ||
27020 // FIXME: If either is a pre/post inc/dec load,
27021 // we'd need to split out the address adjustment.
27022 LLD->isIndexed() || RLD->isIndexed() ||
27023 // If this is an EXTLOAD, the VT's must match.
27024 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27025 // If this is an EXTLOAD, the kind of extension must match.
27026 (LLD->getExtensionType() != RLD->getExtensionType() &&
27027 // The only exception is if one of the extensions is anyext.
27028 LLD->getExtensionType() != ISD::EXTLOAD &&
27029 RLD->getExtensionType() != ISD::EXTLOAD) ||
27030 // FIXME: this discards src value information. This is
27031 // over-conservative. It would be beneficial to be able to remember
27032 // both potential memory locations. Since we are discarding
27033 // src value info, don't do the transformation if the memory
27034 // locations are not in the default address space.
27035 LLD->getPointerInfo().getAddrSpace() != 0 ||
27036 RLD->getPointerInfo().getAddrSpace() != 0 ||
27037 // We can't produce a CMOV of a TargetFrameIndex since we won't
27038 // generate the address generation required.
27041 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27042 LLD->getBasePtr().getValueType()))
27043 return false;
27044
27045 // The loads must not depend on one another.
27046 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27047 return false;
27048
27049 // Check that the select condition doesn't reach either load. If so,
27050 // folding this will induce a cycle into the DAG. If not, this is safe to
27051 // xform, so create a select of the addresses.
27052
27055
27056 // Always fail if LLD and RLD are not independent. TheSelect is a
27057 // predecessor to all Nodes in question so we need not search past it.
27058
27059 Visited.insert(TheSelect);
27060 Worklist.push_back(LLD);
27061 Worklist.push_back(RLD);
27062
27063 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27064 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27065 return false;
27066
27067 SDValue Addr;
27068 if (TheSelect->getOpcode() == ISD::SELECT) {
27069 // We cannot do this optimization if any pair of {RLD, LLD} is a
27070 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27071 // Loads, we only need to check if CondNode is a successor to one of the
27072 // loads. We can further avoid this if there's no use of their chain
27073 // value.
27074 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27075 Worklist.push_back(CondNode);
27076
27077 if ((LLD->hasAnyUseOfValue(1) &&
27078 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27079 (RLD->hasAnyUseOfValue(1) &&
27080 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27081 return false;
27082
27083 Addr = DAG.getSelect(SDLoc(TheSelect),
27084 LLD->getBasePtr().getValueType(),
27085 TheSelect->getOperand(0), LLD->getBasePtr(),
27086 RLD->getBasePtr());
27087 } else { // Otherwise SELECT_CC
27088 // We cannot do this optimization if any pair of {RLD, LLD} is a
27089 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27090 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27091 // one of the loads. We can further avoid this if there's no use of their
27092 // chain value.
27093
27094 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27095 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27096 Worklist.push_back(CondLHS);
27097 Worklist.push_back(CondRHS);
27098
27099 if ((LLD->hasAnyUseOfValue(1) &&
27100 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27101 (RLD->hasAnyUseOfValue(1) &&
27102 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27103 return false;
27104
27105 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27106 LLD->getBasePtr().getValueType(),
27107 TheSelect->getOperand(0),
27108 TheSelect->getOperand(1),
27109 LLD->getBasePtr(), RLD->getBasePtr(),
27110 TheSelect->getOperand(4));
27111 }
27112
27113 SDValue Load;
27114 // It is safe to replace the two loads if they have different alignments,
27115 // but the new load must be the minimum (most restrictive) alignment of the
27116 // inputs.
27117 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27118 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27119 if (!RLD->isInvariant())
27120 MMOFlags &= ~MachineMemOperand::MOInvariant;
27121 if (!RLD->isDereferenceable())
27122 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27123 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27124 // FIXME: Discards pointer and AA info.
27125 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27126 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27127 MMOFlags);
27128 } else {
27129 // FIXME: Discards pointer and AA info.
27130 Load = DAG.getExtLoad(
27132 : LLD->getExtensionType(),
27133 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27134 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27135 }
27136
27137 // Users of the select now use the result of the load.
27138 CombineTo(TheSelect, Load);
27139
27140 // Users of the old loads now use the new load's chain. We know the
27141 // old-load value is dead now.
27142 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27143 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27144 return true;
27145 }
27146
27147 return false;
27148}
27149
27150/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27151/// bitwise 'and'.
27152SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27153 SDValue N1, SDValue N2, SDValue N3,
27154 ISD::CondCode CC) {
27155 // If this is a select where the false operand is zero and the compare is a
27156 // check of the sign bit, see if we can perform the "gzip trick":
27157 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27158 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27159 EVT XType = N0.getValueType();
27160 EVT AType = N2.getValueType();
27161 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27162 return SDValue();
27163
27164 // If the comparison is testing for a positive value, we have to invert
27165 // the sign bit mask, so only do that transform if the target has a bitwise
27166 // 'and not' instruction (the invert is free).
27167 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27168 // (X > -1) ? A : 0
27169 // (X > 0) ? X : 0 <-- This is canonical signed max.
27170 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27171 return SDValue();
27172 } else if (CC == ISD::SETLT) {
27173 // (X < 0) ? A : 0
27174 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27175 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27176 return SDValue();
27177 } else {
27178 return SDValue();
27179 }
27180
27181 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27182 // constant.
27183 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27184 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27185 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27186 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27187 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27188 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27189 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27190 AddToWorklist(Shift.getNode());
27191
27192 if (XType.bitsGT(AType)) {
27193 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27194 AddToWorklist(Shift.getNode());
27195 }
27196
27197 if (CC == ISD::SETGT)
27198 Shift = DAG.getNOT(DL, Shift, AType);
27199
27200 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27201 }
27202 }
27203
27204 unsigned ShCt = XType.getSizeInBits() - 1;
27205 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27206 return SDValue();
27207
27208 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27209 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27210 AddToWorklist(Shift.getNode());
27211
27212 if (XType.bitsGT(AType)) {
27213 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27214 AddToWorklist(Shift.getNode());
27215 }
27216
27217 if (CC == ISD::SETGT)
27218 Shift = DAG.getNOT(DL, Shift, AType);
27219
27220 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27221}
27222
27223// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27224SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27225 SDValue N0 = N->getOperand(0);
27226 SDValue N1 = N->getOperand(1);
27227 SDValue N2 = N->getOperand(2);
27228 SDLoc DL(N);
27229
27230 unsigned BinOpc = N1.getOpcode();
27231 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27232 (N1.getResNo() != N2.getResNo()))
27233 return SDValue();
27234
27235 // The use checks are intentionally on SDNode because we may be dealing
27236 // with opcodes that produce more than one SDValue.
27237 // TODO: Do we really need to check N0 (the condition operand of the select)?
27238 // But removing that clause could cause an infinite loop...
27239 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27240 return SDValue();
27241
27242 // Binops may include opcodes that return multiple values, so all values
27243 // must be created/propagated from the newly created binops below.
27244 SDVTList OpVTs = N1->getVTList();
27245
27246 // Fold select(cond, binop(x, y), binop(z, y))
27247 // --> binop(select(cond, x, z), y)
27248 if (N1.getOperand(1) == N2.getOperand(1)) {
27249 SDValue N10 = N1.getOperand(0);
27250 SDValue N20 = N2.getOperand(0);
27251 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27252 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27253 NewBinOp->setFlags(N1->getFlags());
27254 NewBinOp->intersectFlagsWith(N2->getFlags());
27255 return SDValue(NewBinOp.getNode(), N1.getResNo());
27256 }
27257
27258 // Fold select(cond, binop(x, y), binop(x, z))
27259 // --> binop(x, select(cond, y, z))
27260 if (N1.getOperand(0) == N2.getOperand(0)) {
27261 SDValue N11 = N1.getOperand(1);
27262 SDValue N21 = N2.getOperand(1);
27263 // Second op VT might be different (e.g. shift amount type)
27264 if (N11.getValueType() == N21.getValueType()) {
27265 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27266 SDValue NewBinOp =
27267 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27268 NewBinOp->setFlags(N1->getFlags());
27269 NewBinOp->intersectFlagsWith(N2->getFlags());
27270 return SDValue(NewBinOp.getNode(), N1.getResNo());
27271 }
27272 }
27273
27274 // TODO: Handle isCommutativeBinOp patterns as well?
27275 return SDValue();
27276}
27277
27278// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27279SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27280 SDValue N0 = N->getOperand(0);
27281 EVT VT = N->getValueType(0);
27282 bool IsFabs = N->getOpcode() == ISD::FABS;
27283 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27284
27285 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27286 return SDValue();
27287
27288 SDValue Int = N0.getOperand(0);
27289 EVT IntVT = Int.getValueType();
27290
27291 // The operand to cast should be integer.
27292 if (!IntVT.isInteger() || IntVT.isVector())
27293 return SDValue();
27294
27295 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27296 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27297 APInt SignMask;
27298 if (N0.getValueType().isVector()) {
27299 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27300 // 0x7f...) per element and splat it.
27302 if (IsFabs)
27303 SignMask = ~SignMask;
27304 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27305 } else {
27306 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27307 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27308 if (IsFabs)
27309 SignMask = ~SignMask;
27310 }
27311 SDLoc DL(N0);
27312 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27313 DAG.getConstant(SignMask, DL, IntVT));
27314 AddToWorklist(Int.getNode());
27315 return DAG.getBitcast(VT, Int);
27316}
27317
27318/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27319/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27320/// in it. This may be a win when the constant is not otherwise available
27321/// because it replaces two constant pool loads with one.
27322SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27323 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27324 ISD::CondCode CC) {
27326 return SDValue();
27327
27328 // If we are before legalize types, we want the other legalization to happen
27329 // first (for example, to avoid messing with soft float).
27330 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27331 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27332 EVT VT = N2.getValueType();
27333 if (!TV || !FV || !TLI.isTypeLegal(VT))
27334 return SDValue();
27335
27336 // If a constant can be materialized without loads, this does not make sense.
27338 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27339 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27340 return SDValue();
27341
27342 // If both constants have multiple uses, then we won't need to do an extra
27343 // load. The values are likely around in registers for other users.
27344 if (!TV->hasOneUse() && !FV->hasOneUse())
27345 return SDValue();
27346
27347 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27348 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27349 Type *FPTy = Elts[0]->getType();
27350 const DataLayout &TD = DAG.getDataLayout();
27351
27352 // Create a ConstantArray of the two constants.
27353 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27354 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27355 TD.getPrefTypeAlign(FPTy));
27356 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27357
27358 // Get offsets to the 0 and 1 elements of the array, so we can select between
27359 // them.
27360 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27361 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27362 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27363 SDValue Cond =
27364 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27365 AddToWorklist(Cond.getNode());
27366 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27367 AddToWorklist(CstOffset.getNode());
27368 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27369 AddToWorklist(CPIdx.getNode());
27370 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27372 DAG.getMachineFunction()), Alignment);
27373}
27374
27375/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27376/// where 'cond' is the comparison specified by CC.
27377SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27379 bool NotExtCompare) {
27380 // (x ? y : y) -> y.
27381 if (N2 == N3) return N2;
27382
27383 EVT CmpOpVT = N0.getValueType();
27384 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27385 EVT VT = N2.getValueType();
27386 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27387 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27388 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27389
27390 // Determine if the condition we're dealing with is constant.
27391 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27392 AddToWorklist(SCC.getNode());
27393 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27394 // fold select_cc true, x, y -> x
27395 // fold select_cc false, x, y -> y
27396 return !(SCCC->isZero()) ? N2 : N3;
27397 }
27398 }
27399
27400 if (SDValue V =
27401 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27402 return V;
27403
27404 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27405 return V;
27406
27407 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27408 // where y is has a single bit set.
27409 // A plaintext description would be, we can turn the SELECT_CC into an AND
27410 // when the condition can be materialized as an all-ones register. Any
27411 // single bit-test can be materialized as an all-ones register with
27412 // shift-left and shift-right-arith.
27413 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27414 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27415 SDValue AndLHS = N0->getOperand(0);
27416 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27417 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27418 // Shift the tested bit over the sign bit.
27419 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27420 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27421 unsigned ShCt = AndMask.getBitWidth() - 1;
27422 SDValue ShlAmt =
27423 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27424 getShiftAmountTy(AndLHS.getValueType()));
27425 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27426
27427 // Now arithmetic right shift it all the way over, so the result is
27428 // either all-ones, or zero.
27429 SDValue ShrAmt =
27430 DAG.getConstant(ShCt, SDLoc(Shl),
27432 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27433
27434 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27435 }
27436 }
27437 }
27438
27439 // fold select C, 16, 0 -> shl C, 4
27440 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27441 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27442
27443 if ((Fold || Swap) &&
27444 TLI.getBooleanContents(CmpOpVT) ==
27446 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27447
27448 if (Swap) {
27449 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27450 std::swap(N2C, N3C);
27451 }
27452
27453 // If the caller doesn't want us to simplify this into a zext of a compare,
27454 // don't do it.
27455 if (NotExtCompare && N2C->isOne())
27456 return SDValue();
27457
27458 SDValue Temp, SCC;
27459 // zext (setcc n0, n1)
27460 if (LegalTypes) {
27461 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27462 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27463 } else {
27464 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27465 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27466 }
27467
27468 AddToWorklist(SCC.getNode());
27469 AddToWorklist(Temp.getNode());
27470
27471 if (N2C->isOne())
27472 return Temp;
27473
27474 unsigned ShCt = N2C->getAPIntValue().logBase2();
27475 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27476 return SDValue();
27477
27478 // shl setcc result by log2 n2c
27479 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27480 DAG.getConstant(ShCt, SDLoc(Temp),
27482 }
27483
27484 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27485 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27486 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27487 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27488 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27489 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27490 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27491 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27492 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27493 SDValue ValueOnZero = N2;
27494 SDValue Count = N3;
27495 // If the condition is NE instead of E, swap the operands.
27496 if (CC == ISD::SETNE)
27497 std::swap(ValueOnZero, Count);
27498 // Check if the value on zero is a constant equal to the bits in the type.
27499 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27500 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27501 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27502 // legal, combine to just cttz.
27503 if ((Count.getOpcode() == ISD::CTTZ ||
27504 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27505 N0 == Count.getOperand(0) &&
27506 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27507 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27508 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27509 // legal, combine to just ctlz.
27510 if ((Count.getOpcode() == ISD::CTLZ ||
27511 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27512 N0 == Count.getOperand(0) &&
27513 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27514 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27515 }
27516 }
27517 }
27518
27519 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27520 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27521 if (!NotExtCompare && N1C && N2C && N3C &&
27522 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27523 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27524 (N1C->isZero() && CC == ISD::SETLT)) &&
27525 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27526 SDValue ASR = DAG.getNode(
27527 ISD::SRA, DL, CmpOpVT, N0,
27528 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27529 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27530 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27531 }
27532
27533 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27534 return S;
27535 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27536 return S;
27537
27538 return SDValue();
27539}
27540
27541/// This is a stub for TargetLowering::SimplifySetCC.
27542SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27543 ISD::CondCode Cond, const SDLoc &DL,
27544 bool foldBooleans) {
27546 DagCombineInfo(DAG, Level, false, this);
27547 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27548}
27549
27550/// Given an ISD::SDIV node expressing a divide by constant, return
27551/// a DAG expression to select that will generate the same value by multiplying
27552/// by a magic number.
27553/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27554SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27555 // when optimising for minimum size, we don't want to expand a div to a mul
27556 // and a shift.
27558 return SDValue();
27559
27561 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27562 for (SDNode *N : Built)
27563 AddToWorklist(N);
27564 return S;
27565 }
27566
27567 return SDValue();
27568}
27569
27570/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27571/// DAG expression that will generate the same value by right shifting.
27572SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27573 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27574 if (!C)
27575 return SDValue();
27576
27577 // Avoid division by zero.
27578 if (C->isZero())
27579 return SDValue();
27580
27582 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27583 for (SDNode *N : Built)
27584 AddToWorklist(N);
27585 return S;
27586 }
27587
27588 return SDValue();
27589}
27590
27591/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27592/// expression that will generate the same value by multiplying by a magic
27593/// number.
27594/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27595SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27596 // when optimising for minimum size, we don't want to expand a div to a mul
27597 // and a shift.
27599 return SDValue();
27600
27602 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27603 for (SDNode *N : Built)
27604 AddToWorklist(N);
27605 return S;
27606 }
27607
27608 return SDValue();
27609}
27610
27611/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27612/// return a DAG expression that will generate the same value.
27613SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27614 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27615 if (!C)
27616 return SDValue();
27617
27618 // Avoid division by zero.
27619 if (C->isZero())
27620 return SDValue();
27621
27623 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27624 for (SDNode *N : Built)
27625 AddToWorklist(N);
27626 return S;
27627 }
27628
27629 return SDValue();
27630}
27631
27632// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27633//
27634// Returns the node that represents `Log2(Op)`. This may create a new node. If
27635// we are unable to compute `Log2(Op)` its return `SDValue()`.
27636//
27637// All nodes will be created at `DL` and the output will be of type `VT`.
27638//
27639// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27640// `AssumeNonZero` if this function should simply assume (not require proving
27641// `Op` is non-zero).
27643 SDValue Op, unsigned Depth,
27644 bool AssumeNonZero) {
27645 assert(VT.isInteger() && "Only integer types are supported!");
27646
27647 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27648 while (true) {
27649 switch (V.getOpcode()) {
27650 case ISD::TRUNCATE:
27651 case ISD::ZERO_EXTEND:
27652 V = V.getOperand(0);
27653 break;
27654 default:
27655 return V;
27656 }
27657 }
27658 };
27659
27660 if (VT.isScalableVector())
27661 return SDValue();
27662
27663 Op = PeekThroughCastsAndTrunc(Op);
27664
27665 // Helper for determining whether a value is a power-2 constant scalar or a
27666 // vector of such elements.
27667 SmallVector<APInt> Pow2Constants;
27668 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27669 if (C->isZero() || C->isOpaque())
27670 return false;
27671 // TODO: We may also be able to support negative powers of 2 here.
27672 if (C->getAPIntValue().isPowerOf2()) {
27673 Pow2Constants.emplace_back(C->getAPIntValue());
27674 return true;
27675 }
27676 return false;
27677 };
27678
27679 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27680 if (!VT.isVector())
27681 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27682 // We need to create a build vector
27683 SmallVector<SDValue> Log2Ops;
27684 for (const APInt &Pow2 : Pow2Constants)
27685 Log2Ops.emplace_back(
27686 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27687 return DAG.getBuildVector(VT, DL, Log2Ops);
27688 }
27689
27690 if (Depth >= DAG.MaxRecursionDepth)
27691 return SDValue();
27692
27693 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27694 ToCast = PeekThroughCastsAndTrunc(ToCast);
27695 EVT CurVT = ToCast.getValueType();
27696 if (NewVT == CurVT)
27697 return ToCast;
27698
27699 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27700 return DAG.getBitcast(NewVT, ToCast);
27701
27702 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27703 };
27704
27705 // log2(X << Y) -> log2(X) + Y
27706 if (Op.getOpcode() == ISD::SHL) {
27707 // 1 << Y and X nuw/nsw << Y are all non-zero.
27708 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27709 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27710 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27711 Depth + 1, AssumeNonZero))
27712 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27713 CastToVT(VT, Op.getOperand(1)));
27714 }
27715
27716 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27717 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27718 Op.hasOneUse()) {
27719 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27720 Depth + 1, AssumeNonZero))
27721 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27722 Depth + 1, AssumeNonZero))
27723 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27724 }
27725
27726 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27727 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27728 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27729 Op.hasOneUse()) {
27730 // Use AssumeNonZero as false here. Otherwise we can hit case where
27731 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27732 if (SDValue LogX =
27733 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27734 /*AssumeNonZero*/ false))
27735 if (SDValue LogY =
27736 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27737 /*AssumeNonZero*/ false))
27738 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27739 }
27740
27741 return SDValue();
27742}
27743
27744/// Determines the LogBase2 value for a non-null input value using the
27745/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27746SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27747 bool KnownNonZero, bool InexpensiveOnly,
27748 std::optional<EVT> OutVT) {
27749 EVT VT = OutVT ? *OutVT : V.getValueType();
27750 SDValue InexpensiveLogBase2 =
27751 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27752 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27753 return InexpensiveLogBase2;
27754
27755 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27756 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27757 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27758 return LogBase2;
27759}
27760
27761/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27762/// For the reciprocal, we need to find the zero of the function:
27763/// F(X) = 1/X - A [which has a zero at X = 1/A]
27764/// =>
27765/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27766/// does not require additional intermediate precision]
27767/// For the last iteration, put numerator N into it to gain more precision:
27768/// Result = N X_i + X_i (N - N A X_i)
27769SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27770 SDNodeFlags Flags) {
27771 if (LegalDAG)
27772 return SDValue();
27773
27774 // TODO: Handle extended types?
27775 EVT VT = Op.getValueType();
27776 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27777 VT.getScalarType() != MVT::f64)
27778 return SDValue();
27779
27780 // If estimates are explicitly disabled for this function, we're done.
27782 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27783 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27784 return SDValue();
27785
27786 // Estimates may be explicitly enabled for this type with a custom number of
27787 // refinement steps.
27788 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27789 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27790 AddToWorklist(Est.getNode());
27791
27792 SDLoc DL(Op);
27793 if (Iterations) {
27794 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27795
27796 // Newton iterations: Est = Est + Est (N - Arg * Est)
27797 // If this is the last iteration, also multiply by the numerator.
27798 for (int i = 0; i < Iterations; ++i) {
27799 SDValue MulEst = Est;
27800
27801 if (i == Iterations - 1) {
27802 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27803 AddToWorklist(MulEst.getNode());
27804 }
27805
27806 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27807 AddToWorklist(NewEst.getNode());
27808
27809 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27810 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27811 AddToWorklist(NewEst.getNode());
27812
27813 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27814 AddToWorklist(NewEst.getNode());
27815
27816 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27817 AddToWorklist(Est.getNode());
27818 }
27819 } else {
27820 // If no iterations are available, multiply with N.
27821 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27822 AddToWorklist(Est.getNode());
27823 }
27824
27825 return Est;
27826 }
27827
27828 return SDValue();
27829}
27830
27831/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27832/// For the reciprocal sqrt, we need to find the zero of the function:
27833/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27834/// =>
27835/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27836/// As a result, we precompute A/2 prior to the iteration loop.
27837SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27838 unsigned Iterations,
27839 SDNodeFlags Flags, bool Reciprocal) {
27840 EVT VT = Arg.getValueType();
27841 SDLoc DL(Arg);
27842 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27843
27844 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27845 // this entire sequence requires only one FP constant.
27846 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27847 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27848
27849 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27850 for (unsigned i = 0; i < Iterations; ++i) {
27851 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27852 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27853 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27854 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27855 }
27856
27857 // If non-reciprocal square root is requested, multiply the result by Arg.
27858 if (!Reciprocal)
27859 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27860
27861 return Est;
27862}
27863
27864/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27865/// For the reciprocal sqrt, we need to find the zero of the function:
27866/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27867/// =>
27868/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27869SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27870 unsigned Iterations,
27871 SDNodeFlags Flags, bool Reciprocal) {
27872 EVT VT = Arg.getValueType();
27873 SDLoc DL(Arg);
27874 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27875 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27876
27877 // This routine must enter the loop below to work correctly
27878 // when (Reciprocal == false).
27879 assert(Iterations > 0);
27880
27881 // Newton iterations for reciprocal square root:
27882 // E = (E * -0.5) * ((A * E) * E + -3.0)
27883 for (unsigned i = 0; i < Iterations; ++i) {
27884 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27885 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27886 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27887
27888 // When calculating a square root at the last iteration build:
27889 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27890 // (notice a common subexpression)
27891 SDValue LHS;
27892 if (Reciprocal || (i + 1) < Iterations) {
27893 // RSQRT: LHS = (E * -0.5)
27894 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27895 } else {
27896 // SQRT: LHS = (A * E) * -0.5
27897 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27898 }
27899
27900 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27901 }
27902
27903 return Est;
27904}
27905
27906/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27907/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27908/// Op can be zero.
27909SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27910 bool Reciprocal) {
27911 if (LegalDAG)
27912 return SDValue();
27913
27914 // TODO: Handle extended types?
27915 EVT VT = Op.getValueType();
27916 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27917 VT.getScalarType() != MVT::f64)
27918 return SDValue();
27919
27920 // If estimates are explicitly disabled for this function, we're done.
27922 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27923 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27924 return SDValue();
27925
27926 // Estimates may be explicitly enabled for this type with a custom number of
27927 // refinement steps.
27928 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27929
27930 bool UseOneConstNR = false;
27931 if (SDValue Est =
27932 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27933 Reciprocal)) {
27934 AddToWorklist(Est.getNode());
27935
27936 if (Iterations > 0)
27937 Est = UseOneConstNR
27938 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27939 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27940 if (!Reciprocal) {
27941 SDLoc DL(Op);
27942 // Try the target specific test first.
27943 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27944
27945 // The estimate is now completely wrong if the input was exactly 0.0 or
27946 // possibly a denormal. Force the answer to 0.0 or value provided by
27947 // target for those cases.
27948 Est = DAG.getNode(
27949 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27950 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27951 }
27952 return Est;
27953 }
27954
27955 return SDValue();
27956}
27957
27958SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27959 return buildSqrtEstimateImpl(Op, Flags, true);
27960}
27961
27962SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27963 return buildSqrtEstimateImpl(Op, Flags, false);
27964}
27965
27966/// Return true if there is any possibility that the two addresses overlap.
27967bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27968
27969 struct MemUseCharacteristics {
27970 bool IsVolatile;
27971 bool IsAtomic;
27973 int64_t Offset;
27974 LocationSize NumBytes;
27975 MachineMemOperand *MMO;
27976 };
27977
27978 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27979 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27980 int64_t Offset = 0;
27981 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27982 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
27983 : (LSN->getAddressingMode() == ISD::PRE_DEC)
27984 ? -1 * C->getSExtValue()
27985 : 0;
27986 TypeSize Size = LSN->getMemoryVT().getStoreSize();
27987 return {LSN->isVolatile(), LSN->isAtomic(),
27988 LSN->getBasePtr(), Offset /*base offset*/,
27989 LocationSize::precise(Size), LSN->getMemOperand()};
27990 }
27991 if (const auto *LN = cast<LifetimeSDNode>(N))
27992 return {false /*isVolatile*/,
27993 /*isAtomic*/ false,
27994 LN->getOperand(1),
27995 (LN->hasOffset()) ? LN->getOffset() : 0,
27996 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
27998 (MachineMemOperand *)nullptr};
27999 // Default.
28000 return {false /*isvolatile*/,
28001 /*isAtomic*/ false,
28002 SDValue(),
28003 (int64_t)0 /*offset*/,
28005 (MachineMemOperand *)nullptr};
28006 };
28007
28008 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28009 MUC1 = getCharacteristics(Op1);
28010
28011 // If they are to the same address, then they must be aliases.
28012 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28013 MUC0.Offset == MUC1.Offset)
28014 return true;
28015
28016 // If they are both volatile then they cannot be reordered.
28017 if (MUC0.IsVolatile && MUC1.IsVolatile)
28018 return true;
28019
28020 // Be conservative about atomics for the moment
28021 // TODO: This is way overconservative for unordered atomics (see D66309)
28022 if (MUC0.IsAtomic && MUC1.IsAtomic)
28023 return true;
28024
28025 if (MUC0.MMO && MUC1.MMO) {
28026 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28027 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28028 return false;
28029 }
28030
28031 // If NumBytes is scalable and offset is not 0, conservatively return may
28032 // alias
28033 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28034 MUC0.Offset != 0) ||
28035 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28036 MUC1.Offset != 0))
28037 return true;
28038 // Try to prove that there is aliasing, or that there is no aliasing. Either
28039 // way, we can return now. If nothing can be proved, proceed with more tests.
28040 bool IsAlias;
28041 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28042 DAG, IsAlias))
28043 return IsAlias;
28044
28045 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28046 // either are not known.
28047 if (!MUC0.MMO || !MUC1.MMO)
28048 return true;
28049
28050 // If one operation reads from invariant memory, and the other may store, they
28051 // cannot alias. These should really be checking the equivalent of mayWrite,
28052 // but it only matters for memory nodes other than load /store.
28053 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28054 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28055 return false;
28056
28057 // If we know required SrcValue1 and SrcValue2 have relatively large
28058 // alignment compared to the size and offset of the access, we may be able
28059 // to prove they do not alias. This check is conservative for now to catch
28060 // cases created by splitting vector types, it only works when the offsets are
28061 // multiples of the size of the data.
28062 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28063 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28064 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28065 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28066 LocationSize Size0 = MUC0.NumBytes;
28067 LocationSize Size1 = MUC1.NumBytes;
28068
28069 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28070 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28071 !Size1.isScalable() && Size0 == Size1 &&
28072 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28073 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28074 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28075 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28076 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28077
28078 // There is no overlap between these relatively aligned accesses of
28079 // similar size. Return no alias.
28080 if ((OffAlign0 + static_cast<int64_t>(
28081 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28082 (OffAlign1 + static_cast<int64_t>(
28083 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28084 return false;
28085 }
28086
28087 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28089 : DAG.getSubtarget().useAA();
28090#ifndef NDEBUG
28091 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28093 UseAA = false;
28094#endif
28095
28096 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28097 Size0.hasValue() && Size1.hasValue() &&
28098 // Can't represent a scalable size + fixed offset in LocationSize
28099 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28100 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28101 // Use alias analysis information.
28102 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28103 int64_t Overlap0 =
28104 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28105 int64_t Overlap1 =
28106 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28107 LocationSize Loc0 =
28108 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28109 LocationSize Loc1 =
28110 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28111 if (AA->isNoAlias(
28112 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28113 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28114 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28115 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28116 return false;
28117 }
28118
28119 // Otherwise we have to assume they alias.
28120 return true;
28121}
28122
28123/// Walk up chain skipping non-aliasing memory nodes,
28124/// looking for aliasing nodes and adding them to the Aliases vector.
28125void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28126 SmallVectorImpl<SDValue> &Aliases) {
28127 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28128 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28129
28130 // Get alias information for node.
28131 // TODO: relax aliasing for unordered atomics (see D66309)
28132 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28133
28134 // Starting off.
28135 Chains.push_back(OriginalChain);
28136 unsigned Depth = 0;
28137
28138 // Attempt to improve chain by a single step
28139 auto ImproveChain = [&](SDValue &C) -> bool {
28140 switch (C.getOpcode()) {
28141 case ISD::EntryToken:
28142 // No need to mark EntryToken.
28143 C = SDValue();
28144 return true;
28145 case ISD::LOAD:
28146 case ISD::STORE: {
28147 // Get alias information for C.
28148 // TODO: Relax aliasing for unordered atomics (see D66309)
28149 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28150 cast<LSBaseSDNode>(C.getNode())->isSimple();
28151 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28152 // Look further up the chain.
28153 C = C.getOperand(0);
28154 return true;
28155 }
28156 // Alias, so stop here.
28157 return false;
28158 }
28159
28160 case ISD::CopyFromReg:
28161 // Always forward past CopyFromReg.
28162 C = C.getOperand(0);
28163 return true;
28164
28166 case ISD::LIFETIME_END: {
28167 // We can forward past any lifetime start/end that can be proven not to
28168 // alias the memory access.
28169 if (!mayAlias(N, C.getNode())) {
28170 // Look further up the chain.
28171 C = C.getOperand(0);
28172 return true;
28173 }
28174 return false;
28175 }
28176 default:
28177 return false;
28178 }
28179 };
28180
28181 // Look at each chain and determine if it is an alias. If so, add it to the
28182 // aliases list. If not, then continue up the chain looking for the next
28183 // candidate.
28184 while (!Chains.empty()) {
28185 SDValue Chain = Chains.pop_back_val();
28186
28187 // Don't bother if we've seen Chain before.
28188 if (!Visited.insert(Chain.getNode()).second)
28189 continue;
28190
28191 // For TokenFactor nodes, look at each operand and only continue up the
28192 // chain until we reach the depth limit.
28193 //
28194 // FIXME: The depth check could be made to return the last non-aliasing
28195 // chain we found before we hit a tokenfactor rather than the original
28196 // chain.
28197 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28198 Aliases.clear();
28199 Aliases.push_back(OriginalChain);
28200 return;
28201 }
28202
28203 if (Chain.getOpcode() == ISD::TokenFactor) {
28204 // We have to check each of the operands of the token factor for "small"
28205 // token factors, so we queue them up. Adding the operands to the queue
28206 // (stack) in reverse order maintains the original order and increases the
28207 // likelihood that getNode will find a matching token factor (CSE.)
28208 if (Chain.getNumOperands() > 16) {
28209 Aliases.push_back(Chain);
28210 continue;
28211 }
28212 for (unsigned n = Chain.getNumOperands(); n;)
28213 Chains.push_back(Chain.getOperand(--n));
28214 ++Depth;
28215 continue;
28216 }
28217 // Everything else
28218 if (ImproveChain(Chain)) {
28219 // Updated Chain Found, Consider new chain if one exists.
28220 if (Chain.getNode())
28221 Chains.push_back(Chain);
28222 ++Depth;
28223 continue;
28224 }
28225 // No Improved Chain Possible, treat as Alias.
28226 Aliases.push_back(Chain);
28227 }
28228}
28229
28230/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28231/// (aliasing node.)
28232SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28233 if (OptLevel == CodeGenOptLevel::None)
28234 return OldChain;
28235
28236 // Ops for replacing token factor.
28238
28239 // Accumulate all the aliases to this node.
28240 GatherAllAliases(N, OldChain, Aliases);
28241
28242 // If no operands then chain to entry token.
28243 if (Aliases.empty())
28244 return DAG.getEntryNode();
28245
28246 // If a single operand then chain to it. We don't need to revisit it.
28247 if (Aliases.size() == 1)
28248 return Aliases[0];
28249
28250 // Construct a custom tailored token factor.
28251 return DAG.getTokenFactor(SDLoc(N), Aliases);
28252}
28253
28254// This function tries to collect a bunch of potentially interesting
28255// nodes to improve the chains of, all at once. This might seem
28256// redundant, as this function gets called when visiting every store
28257// node, so why not let the work be done on each store as it's visited?
28258//
28259// I believe this is mainly important because mergeConsecutiveStores
28260// is unable to deal with merging stores of different sizes, so unless
28261// we improve the chains of all the potential candidates up-front
28262// before running mergeConsecutiveStores, it might only see some of
28263// the nodes that will eventually be candidates, and then not be able
28264// to go from a partially-merged state to the desired final
28265// fully-merged state.
28266
28267bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28268 SmallVector<StoreSDNode *, 8> ChainedStores;
28269 StoreSDNode *STChain = St;
28270 // Intervals records which offsets from BaseIndex have been covered. In
28271 // the common case, every store writes to the immediately previous address
28272 // space and thus merged with the previous interval at insertion time.
28273
28274 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28276 IMap::Allocator A;
28277 IMap Intervals(A);
28278
28279 // This holds the base pointer, index, and the offset in bytes from the base
28280 // pointer.
28282
28283 // We must have a base and an offset.
28284 if (!BasePtr.getBase().getNode())
28285 return false;
28286
28287 // Do not handle stores to undef base pointers.
28288 if (BasePtr.getBase().isUndef())
28289 return false;
28290
28291 // Do not handle stores to opaque types
28292 if (St->getMemoryVT().isZeroSized())
28293 return false;
28294
28295 // BaseIndexOffset assumes that offsets are fixed-size, which
28296 // is not valid for scalable vectors where the offsets are
28297 // scaled by `vscale`, so bail out early.
28298 if (St->getMemoryVT().isScalableVT())
28299 return false;
28300
28301 // Add ST's interval.
28302 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28303 std::monostate{});
28304
28305 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28306 if (Chain->getMemoryVT().isScalableVector())
28307 return false;
28308
28309 // If the chain has more than one use, then we can't reorder the mem ops.
28310 if (!SDValue(Chain, 0)->hasOneUse())
28311 break;
28312 // TODO: Relax for unordered atomics (see D66309)
28313 if (!Chain->isSimple() || Chain->isIndexed())
28314 break;
28315
28316 // Find the base pointer and offset for this memory node.
28317 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28318 // Check that the base pointer is the same as the original one.
28319 int64_t Offset;
28320 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28321 break;
28322 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28323 // Make sure we don't overlap with other intervals by checking the ones to
28324 // the left or right before inserting.
28325 auto I = Intervals.find(Offset);
28326 // If there's a next interval, we should end before it.
28327 if (I != Intervals.end() && I.start() < (Offset + Length))
28328 break;
28329 // If there's a previous interval, we should start after it.
28330 if (I != Intervals.begin() && (--I).stop() <= Offset)
28331 break;
28332 Intervals.insert(Offset, Offset + Length, std::monostate{});
28333
28334 ChainedStores.push_back(Chain);
28335 STChain = Chain;
28336 }
28337
28338 // If we didn't find a chained store, exit.
28339 if (ChainedStores.empty())
28340 return false;
28341
28342 // Improve all chained stores (St and ChainedStores members) starting from
28343 // where the store chain ended and return single TokenFactor.
28344 SDValue NewChain = STChain->getChain();
28346 for (unsigned I = ChainedStores.size(); I;) {
28347 StoreSDNode *S = ChainedStores[--I];
28348 SDValue BetterChain = FindBetterChain(S, NewChain);
28349 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28350 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28351 TFOps.push_back(SDValue(S, 0));
28352 ChainedStores[I] = S;
28353 }
28354
28355 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28356 SDValue BetterChain = FindBetterChain(St, NewChain);
28357 SDValue NewST;
28358 if (St->isTruncatingStore())
28359 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28360 St->getBasePtr(), St->getMemoryVT(),
28361 St->getMemOperand());
28362 else
28363 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28364 St->getBasePtr(), St->getMemOperand());
28365
28366 TFOps.push_back(NewST);
28367
28368 // If we improved every element of TFOps, then we've lost the dependence on
28369 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28370 // the beginning to keep relative order consistent with FindBetterChains.
28371 auto hasImprovedChain = [&](SDValue ST) -> bool {
28372 return ST->getOperand(0) != NewChain;
28373 };
28374 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28375 if (AddNewChain)
28376 TFOps.insert(TFOps.begin(), NewChain);
28377
28378 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28379 CombineTo(St, TF);
28380
28381 // Add TF and its operands to the worklist.
28382 AddToWorklist(TF.getNode());
28383 for (const SDValue &Op : TF->ops())
28384 AddToWorklist(Op.getNode());
28385 AddToWorklist(STChain);
28386 return true;
28387}
28388
28389bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28390 if (OptLevel == CodeGenOptLevel::None)
28391 return false;
28392
28394
28395 // We must have a base and an offset.
28396 if (!BasePtr.getBase().getNode())
28397 return false;
28398
28399 // Do not handle stores to undef base pointers.
28400 if (BasePtr.getBase().isUndef())
28401 return false;
28402
28403 // Directly improve a chain of disjoint stores starting at St.
28404 if (parallelizeChainedStores(St))
28405 return true;
28406
28407 // Improve St's Chain..
28408 SDValue BetterChain = FindBetterChain(St, St->getChain());
28409 if (St->getChain() != BetterChain) {
28410 replaceStoreChain(St, BetterChain);
28411 return true;
28412 }
28413 return false;
28414}
28415
28416/// This is the entry point for the file.
28418 CodeGenOptLevel OptLevel) {
28419 /// This is the main entry point to this class.
28420 DAGCombiner(*this, AA, OptLevel).Run(Level);
28421}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:361
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:479
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:908
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:229
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:368
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:374
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:360
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1311
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1198
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1014
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:261
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:945
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1147
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1019
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:314
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1496
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1606
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1581
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1601
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1422
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:933
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:836
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:553
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:854
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1527
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:895
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1477
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:307
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...