82#include "llvm/IR/IntrinsicsAArch64.h"
92#define DEBUG_TYPE "aarch64-sve-shuffle-opts"
104 assert(
I->getIntrinsicID() == Intrinsic::vector_deinterleave4 &&
105 "Only deinterleave4 supported currently");
120 std::array<CastInst *, 4> Extends = {};
122 Type *DestTy =
nullptr;
123 for (
User *U :
I->users()) {
125 if (!Extract || !Extract->hasOneUse())
135 if (!L.contains(Extend))
138 Opcode = Extend->getOpcode();
139 DestTy = Extend->getDestTy();
145 Extends[Extract->getIndices().front()] = Extend;
155 Candidates.try_emplace(
I, Extends);
164 unsigned DstBits = DestTy->getScalarSizeInBits();
165 unsigned SrcBits = SrcTy->getScalarSizeInBits();
172 for (
auto [Idx, Extend] :
enumerate(Extends)) {
189 Value *StepVector = Builder.CreateStepVector(StepVecTy);
191 Builder.CreateNUWMul(StepVector, ConstantInt::get(StepVecTy, 4));
192 Value *ZextTbl = Builder.CreateNUWAdd(
193 ScaledSteps, ConstantInt::get(StepVecTy, StartIdx));
194 Value *FinalMask = Builder.CreateBitCast(ZextTbl, InputTy);
198 Value *Tbl = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_tbl,
199 {InputTy}, {
Input, FinalMask});
200 Value *
Widen = Builder.CreateBitCast(Tbl, StepVecTy);
203 LLVM_DEBUG(
dbgs() <<
"SVETBLOPT: Replaced " << *Extend <<
" with "
205 Extend->replaceAllUsesWith(
Widen);
206 Extend->eraseFromParent();
219 if (!L.isInnermost() || !ST.isSVEorStreamingSVEAvailable())
228 "Shuffle optimizations unsupported for big endian targets.");
230 for (
auto *BB : L.blocks())
235 if (Candidates.empty())
243struct SVEShuffleOpts :
public LoopPass {
245 SVEShuffleOpts() : LoopPass(ID) {}
247 bool runOnLoop(Loop *L, LPPassManager &PM)
override {
251 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
252 const AArch64TargetMachine &TM = TPC.
getTM<AArch64TargetMachine>();
253 const AArch64Subtarget &
ST =
259 void getAnalysisUsage(AnalysisUsage &AU)
const override {
264 StringRef getPassName()
const override {
return "SVE Shuffle Optimizations"; }
268char SVEShuffleOpts::ID = 0;
269static const char *
name =
"SVE Shuffle Optimizations";
280 *TM.getSubtargetImpl(*L.getHeader()->getParent());
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This header defines various interfaces for pass management in LLVM.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
SmallDenseMap< CallInst *, std::array< CastInst *, 4 > > DeinterleaveMap
A mapping between a vector_deinterleaveN intrinsic and extending cast instructions used on the result...
static bool processLoop(Loop &L, const AArch64Subtarget &ST, DataLayout DL)
static void optimizeSVEDeinterleavedExtends(DeinterleaveMap Deinterleaves)
Given a map of deinterleaves to zext or uitofp casts, remove the operations and replace them with tbl...
static void evaluateDeinterleave(IntrinsicInst *I, DeinterleaveMap &Candidates, Loop &L, const AArch64TargetLowering &TL, const DataLayout DL)
Evaluate a deinterleave and see what the uses are.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
const AArch64Subtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
A function analysis which provides an AssumptionCache.
Represents analyses that only rely on functions' control flow.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
This class represents a range of values.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
A parsed version of the target data layout string in and methods for querying it.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Represents a single loop in the control flow graph.
An analysis that produces MemorySSA for a function.
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
LLVM_ABI PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Analysis pass providing the TargetTransformInfo.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
const DataLayout createDataLayout() const
Create a DataLayout.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Pass * createSVEShuffleOptsPass()
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...