35#define DEBUG_TYPE "hexagon-shuffle"
46 enum { MAX = 360360 };
50 HexagonBid() =
default;
54 bool isSold()
const {
return (Bid >= MAX); }
63class HexagonUnitAuction {
69 HexagonUnitAuction(
unsigned cs = 0) : isSold(cs) {}
72 bool bid(
unsigned B) {
74 unsigned b =
B & ~isSold;
79 Scores[i] += HexagonBid(b);
80 isSold |= Scores[i].isSold() << i;
92 const unsigned SlotWeight = 8;
93 const unsigned MaskWeight = SlotWeight - 1;
95 unsigned Key = ((1u << s) & Units) != 0;
100 if (Key == 0 || Units == 0 || (SlotWeight * s >= 32))
105 Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
119 if (Units == 0 && Lanes == 0) {
144 for (
unsigned i = 1; i < Lanes; ++i)
145 startBit = (startBit << 1) | startBit;
150 unsigned usedUnits) {
151 if (startIdx < hvxInsts.
size()) {
152 if (!hvxInsts[startIdx].Units)
154 for (
unsigned b = 0x1; b <= 0x8; b <<= 1) {
155 if ((hvxInsts[startIdx].Units & b) == 0)
157 unsigned allBits =
makeAllBits(b, hvxInsts[startIdx].Lanes);
158 if ((allBits & usedUnits) == 0) {
159 if (
checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits))
171 : Context(Context), BundleFlags(), MCII(MCII), STI(STI),
172 ReportErrors(ReportErrors), CheckFailure() {
197 if (Summary.Slot1AOKLoc)
199 MCInst const &Inst = ISJ.getDesc();
204 const unsigned Units = ISJ.Core.getUnits();
209 "Instruction was restricted from being in slot 1"));
211 *Summary.Slot1AOKLoc,
"Instruction can only be combined "
212 "with an ALU instruction in slot 1"));
220 HexagonPacketSummary
const &Summary) {
225 if (!Summary.NoSlot1StoreLoc)
228 bool AppliedRestriction =
false;
231 MCInst const &Inst = ISJ.getDesc();
233 unsigned Units = ISJ.Core.getUnits();
235 AppliedRestriction =
true;
237 Inst.
getLoc(),
"Instruction was restricted from being in slot 1"));
243 if (AppliedRestriction)
245 std::make_pair(*Summary.NoSlot1StoreLoc,
246 "Instruction does not allow a store in slot 1"));
250 const bool DoShuffle) {
273 const bool HasMultipleBranches = Summary.branchInsts.size() > 1;
274 if (!HasMultipleBranches)
277 if (Summary.branchInsts.size() > 2) {
282 const static std::pair<unsigned, unsigned> jumpSlots[] = {
283 {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
285 for (std::pair<unsigned, unsigned> jumpSlot : jumpSlots) {
287 if (!(jumpSlot.first & Summary.branchInsts[0]->Core.getUnits()))
291 if (!(jumpSlot.second & Summary.branchInsts[1]->Core.getUnits()))
296 Summary.branchInsts[0]->Core.setUnits(jumpSlot.first);
297 Summary.branchInsts[1]->Core.setUnits(jumpSlot.second);
299 const bool HasShuffledPacket =
tryAuction(Summary).has_value();
300 if (HasShuffledPacket)
315 ISJ.Core.setAllUnits();
320 std::optional<HexagonPacket> ShuffledPacket =
tryAuction(Summary);
322 if (!ShuffledPacket) {
332 for (
const auto &
I : *ShuffledPacket) {
334 inst.
Units =
I.CVI.getUnits();
335 inst.
Lanes =
I.CVI.getLanes();
342 if (hvxInsts.
size() > 0) {
343 unsigned startIdx, usedUnits;
344 startIdx = usedUnits = 0x0;
352 Packet = *ShuffledPacket;
358 HexagonPacketSummary
const &Summary) {
361 static const unsigned slotFirstLoadStore =
Slot1Mask;
362 static const unsigned slotLastLoadStore =
Slot0Mask;
363 unsigned slotLoadStore = slotFirstLoadStore;
368 if (!ISJ->Core.getUnits())
374 if (Summary.loads == 1 && Summary.loads == Summary.memory &&
377 switch (
ID.getOpcode()) {
378 case Hexagon::V6_vgathermw:
379 case Hexagon::V6_vgathermh:
380 case Hexagon::V6_vgathermhw:
381 case Hexagon::V6_vgathermwq:
382 case Hexagon::V6_vgathermhq:
383 case Hexagon::V6_vgathermhwq:
393 if (slotLoadStore < slotLastLoadStore) {
395 reportError(
"invalid instruction packet: too many loads");
399 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
407 if (!Summary.store0) {
408 const bool PacketHasNoOnlySlot0 =
411 I.ID->getOpcode() !=
ID.getOpcode();
413 const bool SafeToMoveToSlot0 =
414 (Summary.loads == 0) ||
417 if (Summary.stores == 1 && SafeToMoveToSlot0)
420 else if (Summary.stores >= 1) {
421 if (slotLoadStore < slotLastLoadStore) {
423 reportError(
"invalid instruction packet: too many stores");
427 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
432 if (Summary.store1 && Summary.stores > 1) {
434 reportError(
"invalid instruction packet: too many stores");
446 if ((SlotMask & (1 << SlotNum)) != 0)
449 return llvm::join(Slots,
StringRef(
", "));
453 HexagonPacketSummary Summary = HexagonPacketSummary();
460 Summary.Slot1AOKLoc =
ID.getLoc();
462 Summary.NoSlot1StoreLoc =
ID.getLoc();
466 Summary.PrefSlot3Inst = ISJ;
468 const unsigned ReservedSlots =
470 Summary.ReservedSlotMask |= ReservedSlots;
471 if (ReservedSlots != 0)
473 (
Twine(
"Instruction has reserved slots: ") +
483 Summary.branchInsts.push_back(ISJ);
491 ++Summary.NonZCVIloads;
494 ++Summary.AllCVIloads;
503 Summary.branchInsts.push_back(ISJ);
531 Summary.branchInsts.push_back(ISJ);
551 Summary.branchInsts.push_back(ISJ);
555 MCInst const &Inst0 = *
ID.getOperand(0).getInst();
556 MCInst const &Inst1 = *
ID.getOperand(1).getInst();
558 Summary.branchInsts.push_back(ISJ);
560 Summary.branchInsts.push_back(ISJ);
562 Summary.branchInsts.push_back(ISJ);
564 Summary.branchInsts.push_back(ISJ);
573 HexagonPacketSummary
const &Summary)
const {
575 const unsigned ZCVIloads = Summary.AllCVIloads - Summary.NonZCVIloads;
576 const bool ValidHVXMem =
577 Summary.NonZCVIloads <= 1 && ZCVIloads <= 1 && Summary.CVIstores <= 1;
578 const bool InvalidPacket =
579 ((Summary.load0 > 1 || Summary.store0 > 1 || !ValidHVXMem) ||
580 (Summary.duplex > 1 || (Summary.duplex && Summary.memory)));
582 return !InvalidPacket;
586 const bool DoShuffle) {
591 const bool NeedsPrefSlot3Shuffle = Summary.branchInsts.size() <= 1 &&
592 !HasOnlySlot3 && Summary.pSlot3Cnt == 1 &&
593 Summary.PrefSlot3Inst && DoShuffle;
595 if (!NeedsPrefSlot3Shuffle)
601 const unsigned saveUnits = PrefSlot3Inst->Core.
getUnits();
603 const bool HasShuffledPacket =
tryAuction(Summary).has_value();
604 if (HasShuffledPacket)
607 PrefSlot3Inst->Core.
setUnits(saveUnits);
627std::optional<HexagonShuffler::HexagonPacket>
630 HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask);
633 const bool ValidSlots =
635 return AuctionCore.bid(
I.Core.getUnits());
639 dbgs() <<
"Shuffle attempt: " << (ValidSlots ?
"passed" :
"failed")
646 std::optional<HexagonPacket> Res;
663 if (
size() > 1 && Ok)
668 unsigned slotSkip, slotWeight;
671 for (ISJ = ISK = Packet.
begin(), slotSkip = slotWeight = 0;
672 ISK != Packet.
end(); ++ISK, ++slotSkip)
673 if (slotSkip < nSlot - emptySlots)
683 std::stable_sort(ISJ, Packet.
end());
692 if (ISJ.CVI.isValid()) {
695 dbgs() << ISJ.CVI.getLanes();
717 const unsigned Units =
I.Core.getUnits();
720 const std::string UnitsText = Units ?
SlotMaskToText(Units) :
"<None>";
722 Twine(
"Instruction can utilize slots: ") +
727 "Instruction does not require a slot");
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define HEXAGON_PACKET_SIZE
static const unsigned Slot1Mask
static const unsigned Slot0Mask
static std::string SlotMaskToText(unsigned SlotMask)
static const unsigned slotSingleLoad
static const unsigned slotSingleStore
static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, unsigned usedUnits)
static const unsigned Slot3Mask
static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
static bool isBranch(unsigned Opcode)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
HexagonCVIResource(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, unsigned s, MCInst const *id)
static bool lessCore(const HexagonInstr &A, const HexagonInstr &B)
static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B)
unsigned setWeight(unsigned s)
unsigned getUnits() const
void setUnits(unsigned s)
HexagonShuffler(MCContext &Context, bool ReportErrors, MCInstrInfo const &MCII, MCSubtargetInfo const &STI)
bool isMemReorderDisabled() const
void restrictNoSlot1Store(HexagonPacketSummary const &Summary)
bool check(const bool RequireShuffle=true)
Check that the packet is legal and enforce relative insn order.
void restrictSlot1AOK(HexagonPacketSummary const &Summary)
bool restrictStoreLoadOrder(HexagonPacketSummary const &Summary)
void reportError(Twine const &Msg)
void reportResourceError(HexagonPacketSummary const &Summary, StringRef Err)
HexagonPacketSummary GetPacketSummary()
bool ValidResourceUsage(HexagonPacketSummary const &Summary)
MCSubtargetInfo const & STI
void restrictBranchOrder(HexagonPacketSummary const &Summary)
void append(MCInst const &ID, MCInst const *Extender, unsigned S)
bool applySlotRestrictions(HexagonPacketSummary const &Summary, const bool DoShuffle)
void reportResourceUsage(HexagonPacketSummary const &Summary)
bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const
void restrictPreferSlot3(HexagonPacketSummary const &Summary, const bool DoShuffle)
std::vector< std::pair< SMLoc, std::string > > AppliedRestrictions
std::optional< HexagonPacket > tryAuction(HexagonPacketSummary const &Summary)
Context object for machine code objects.
const SourceMgr * getSourceManager() const
void reportError(SMLoc L, const Twine &Msg)
Instances of this class represent a single low-level machine instruction.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Generic base class for all target subtargets.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
raw_ostream & write_hex(unsigned long long N)
Output N in hexadecimal, without any prefix or padding.
@ TypeCVI_SCATTER_NEW_RST
bool isRestrictSlot1AOK(MCInstrInfo const &MCII, MCInst const &MCI)
Return whether the insn can be packaged only with an A-type insn in slot #1.
bool IsABranchingInst(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &I)
unsigned getCVIResources(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the resources used by this instruction.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI)
unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI)
Return the Hexagon ISA class for the insn.
bool isImmext(MCInst const &MCI)
unsigned getOtherReservedSlots(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the slots this instruction consumes in addition to the slot(s) it can execute out of.
bool requiresSlot(MCSubtargetInfo const &STI, MCInst const &MCI)
StringRef getName(MCInstrInfo const &MCII, MCInst const &MCI)
bool isRestrictNoSlot1Store(MCInstrInfo const &MCII, MCInst const &MCI)
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes)
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.