34#define DEBUG_TYPE "hexagon-shuffle"
45 enum { MAX = 360360 };
49 HexagonBid() =
default;
53 bool isSold()
const {
return (Bid >= MAX); }
62class HexagonUnitAuction {
68 HexagonUnitAuction(
unsigned cs = 0) : isSold(cs) {}
71 bool bid(
unsigned B) {
73 unsigned b =
B & ~isSold;
78 Scores[i] += HexagonBid(b);
79 isSold |= Scores[i].isSold() << i;
91 const unsigned SlotWeight = 8;
92 const unsigned MaskWeight = SlotWeight - 1;
94 unsigned Key = ((1u << s) & Units) != 0;
99 if (Key == 0 || Units == 0 || (SlotWeight * s >= 32))
104 Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
118 if (Units == 0 && Lanes == 0) {
143 for (
unsigned i = 1; i < Lanes; ++i)
144 startBit = (startBit << 1) | startBit;
149 unsigned usedUnits) {
150 if (startIdx < hvxInsts.
size()) {
151 if (!hvxInsts[startIdx].Units)
153 for (
unsigned b = 0x1; b <= 0x8; b <<= 1) {
154 if ((hvxInsts[startIdx].Units & b) == 0)
156 unsigned allBits =
makeAllBits(b, hvxInsts[startIdx].Lanes);
157 if ((allBits & usedUnits) == 0) {
158 if (
checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits))
170 : Context(Context), BundleFlags(), MCII(MCII), STI(STI),
171 ReportErrors(ReportErrors), CheckFailure() {
196 if (Summary.Slot1AOKLoc)
198 MCInst const &Inst = ISJ.getDesc();
203 const unsigned Units = ISJ.Core.getUnits();
208 "Instruction was restricted from being in slot 1"));
210 *Summary.Slot1AOKLoc,
"Instruction can only be combined "
211 "with an ALU instruction in slot 1"));
219 HexagonPacketSummary
const &Summary) {
224 if (!Summary.NoSlot1StoreLoc)
227 bool AppliedRestriction =
false;
230 MCInst const &Inst = ISJ.getDesc();
232 unsigned Units = ISJ.Core.getUnits();
234 AppliedRestriction =
true;
236 Inst.
getLoc(),
"Instruction was restricted from being in slot 1"));
242 if (AppliedRestriction)
244 std::make_pair(*Summary.NoSlot1StoreLoc,
245 "Instruction does not allow a store in slot 1"));
249 const bool DoShuffle) {
272 const bool HasMultipleBranches = Summary.branchInsts.size() > 1;
273 if (!HasMultipleBranches)
276 if (Summary.branchInsts.size() > 2) {
281 const static std::pair<unsigned, unsigned> jumpSlots[] = {
282 {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
284 for (std::pair<unsigned, unsigned> jumpSlot : jumpSlots) {
286 if (!(jumpSlot.first & Summary.branchInsts[0]->Core.getUnits()))
290 if (!(jumpSlot.second & Summary.branchInsts[1]->Core.getUnits()))
295 Summary.branchInsts[0]->Core.setUnits(jumpSlot.first);
296 Summary.branchInsts[1]->Core.setUnits(jumpSlot.second);
298 const bool HasShuffledPacket =
tryAuction(Summary).has_value();
299 if (HasShuffledPacket)
314 ISJ.Core.setAllUnits();
319 std::optional<HexagonPacket> ShuffledPacket =
tryAuction(Summary);
321 if (!ShuffledPacket) {
331 for (
const auto &
I : *ShuffledPacket) {
333 inst.
Units =
I.CVI.getUnits();
334 inst.
Lanes =
I.CVI.getLanes();
341 if (hvxInsts.
size() > 0) {
342 unsigned startIdx, usedUnits;
343 startIdx = usedUnits = 0x0;
351 Packet = *ShuffledPacket;
357 HexagonPacketSummary
const &Summary) {
360 static const unsigned slotFirstLoadStore =
Slot1Mask;
361 static const unsigned slotLastLoadStore =
Slot0Mask;
362 unsigned slotLoadStore = slotFirstLoadStore;
367 if (!ISJ->Core.getUnits())
373 if (Summary.loads == 1 && Summary.loads == Summary.memory &&
376 switch (
ID.getOpcode()) {
377 case Hexagon::V6_vgathermw:
378 case Hexagon::V6_vgathermh:
379 case Hexagon::V6_vgathermhw:
380 case Hexagon::V6_vgathermwq:
381 case Hexagon::V6_vgathermhq:
382 case Hexagon::V6_vgathermhwq:
392 if (slotLoadStore < slotLastLoadStore) {
394 reportError(
"invalid instruction packet: too many loads");
398 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
406 if (!Summary.store0) {
407 const bool PacketHasNoOnlySlot0 =
410 I.ID->getOpcode() !=
ID.getOpcode();
412 const bool SafeToMoveToSlot0 =
413 (Summary.loads == 0) ||
416 if (Summary.stores == 1 && SafeToMoveToSlot0)
419 else if (Summary.stores >= 1) {
420 if (slotLoadStore < slotLastLoadStore) {
422 reportError(
"invalid instruction packet: too many stores");
426 ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore);
431 if (Summary.store1 && Summary.stores > 1) {
433 reportError(
"invalid instruction packet: too many stores");
445 if ((SlotMask & (1 << SlotNum)) != 0)
448 return llvm::join(Slots,
StringRef(
", "));
452 HexagonPacketSummary Summary = HexagonPacketSummary();
459 Summary.Slot1AOKLoc =
ID.getLoc();
461 Summary.NoSlot1StoreLoc =
ID.getLoc();
465 Summary.PrefSlot3Inst = ISJ;
467 const unsigned ReservedSlots =
469 Summary.ReservedSlotMask |= ReservedSlots;
470 if (ReservedSlots != 0)
472 (
Twine(
"Instruction has reserved slots: ") +
482 Summary.branchInsts.push_back(ISJ);
490 ++Summary.NonZCVIloads;
493 ++Summary.AllCVIloads;
502 Summary.branchInsts.push_back(ISJ);
530 Summary.branchInsts.push_back(ISJ);
550 Summary.branchInsts.push_back(ISJ);
554 MCInst const &Inst0 = *
ID.getOperand(0).getInst();
555 MCInst const &Inst1 = *
ID.getOperand(1).getInst();
557 Summary.branchInsts.push_back(ISJ);
559 Summary.branchInsts.push_back(ISJ);
561 Summary.branchInsts.push_back(ISJ);
563 Summary.branchInsts.push_back(ISJ);
572 HexagonPacketSummary
const &Summary)
const {
574 const unsigned ZCVIloads = Summary.AllCVIloads - Summary.NonZCVIloads;
575 const bool ValidHVXMem =
576 Summary.NonZCVIloads <= 1 && ZCVIloads <= 1 && Summary.CVIstores <= 1;
577 const bool InvalidPacket =
578 ((Summary.load0 > 1 || Summary.store0 > 1 || !ValidHVXMem) ||
579 (Summary.duplex > 1 || (Summary.duplex && Summary.memory)));
581 return !InvalidPacket;
585 const bool DoShuffle) {
590 const bool NeedsPrefSlot3Shuffle = Summary.branchInsts.size() <= 1 &&
591 !HasOnlySlot3 && Summary.pSlot3Cnt == 1 &&
592 Summary.PrefSlot3Inst && DoShuffle;
594 if (!NeedsPrefSlot3Shuffle)
600 const unsigned saveUnits = PrefSlot3Inst->Core.
getUnits();
602 const bool HasShuffledPacket =
tryAuction(Summary).has_value();
603 if (HasShuffledPacket)
606 PrefSlot3Inst->Core.
setUnits(saveUnits);
626std::optional<HexagonShuffler::HexagonPacket>
629 HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask);
632 const bool ValidSlots =
634 return AuctionCore.bid(
I.Core.getUnits());
638 dbgs() <<
"Shuffle attempt: " << (ValidSlots ?
"passed" :
"failed")
645 std::optional<HexagonPacket> Res;
662 if (
size() > 1 && Ok)
667 unsigned slotSkip, slotWeight;
670 for (ISJ = ISK = Packet.
begin(), slotSkip = slotWeight = 0;
671 ISK != Packet.
end(); ++ISK, ++slotSkip)
672 if (slotSkip < nSlot - emptySlots)
682 std::stable_sort(ISJ, Packet.
end());
691 if (ISJ.CVI.isValid()) {
694 dbgs() << ISJ.CVI.getLanes();
716 const unsigned Units =
I.Core.getUnits();
719 const std::string UnitsText = Units ?
SlotMaskToText(Units) :
"<None>";
721 Twine(
"Instruction can utilize slots: ") +
726 "Instruction does not require a slot");
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define HEXAGON_PACKET_SIZE
static const unsigned Slot1Mask
static const unsigned Slot0Mask
static std::string SlotMaskToText(unsigned SlotMask)
static const unsigned slotSingleLoad
static const unsigned slotSingleStore
static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, unsigned usedUnits)
static const unsigned Slot3Mask
static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
static bool isBranch(unsigned Opcode)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
HexagonCVIResource(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, unsigned s, MCInst const *id)
static bool lessCore(const HexagonInstr &A, const HexagonInstr &B)
static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B)
unsigned setWeight(unsigned s)
unsigned getUnits() const
void setUnits(unsigned s)
HexagonShuffler(MCContext &Context, bool ReportErrors, MCInstrInfo const &MCII, MCSubtargetInfo const &STI)
bool isMemReorderDisabled() const
void restrictNoSlot1Store(HexagonPacketSummary const &Summary)
bool check(const bool RequireShuffle=true)
Check that the packet is legal and enforce relative insn order.
void restrictSlot1AOK(HexagonPacketSummary const &Summary)
bool restrictStoreLoadOrder(HexagonPacketSummary const &Summary)
void reportError(Twine const &Msg)
void reportResourceError(HexagonPacketSummary const &Summary, StringRef Err)
HexagonPacketSummary GetPacketSummary()
bool ValidResourceUsage(HexagonPacketSummary const &Summary)
MCSubtargetInfo const & STI
void restrictBranchOrder(HexagonPacketSummary const &Summary)
void append(MCInst const &ID, MCInst const *Extender, unsigned S)
bool applySlotRestrictions(HexagonPacketSummary const &Summary, const bool DoShuffle)
void reportResourceUsage(HexagonPacketSummary const &Summary)
bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const
void restrictPreferSlot3(HexagonPacketSummary const &Summary, const bool DoShuffle)
std::vector< std::pair< SMLoc, std::string > > AppliedRestrictions
std::optional< HexagonPacket > tryAuction(HexagonPacketSummary const &Summary)
Context object for machine code objects.
const SourceMgr * getSourceManager() const
void reportError(SMLoc L, const Twine &Msg)
Instances of this class represent a single low-level machine instruction.
bool mayLoad() const
Return true if this instruction could possibly read memory.
unsigned getOpcode() const
Return the opcode number for this descriptor.
Interface to description of machine instruction set.
Generic base class for all target subtargets.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
raw_ostream & write_hex(unsigned long long N)
Output N in hexadecimal, without any prefix or padding.
@ TypeCVI_SCATTER_NEW_RST
bool isRestrictSlot1AOK(MCInstrInfo const &MCII, MCInst const &MCI)
Return whether the insn can be packaged only with an A-type insn in slot #1.
bool IsABranchingInst(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &I)
unsigned getCVIResources(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the resources used by this instruction.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI)
unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI)
Return the Hexagon ISA class for the insn.
bool isImmext(MCInst const &MCI)
unsigned getOtherReservedSlots(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst const &MCI)
Return the slots this instruction consumes in addition to the slot(s) it can execute out of.
bool requiresSlot(MCSubtargetInfo const &STI, MCInst const &MCI)
StringRef getName(MCInstrInfo const &MCII, MCInst const &MCI)
bool isRestrictNoSlot1Store(MCInstrInfo const &MCII, MCInst const &MCI)
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned HexagonConvertUnits(unsigned ItinUnits, unsigned *Lanes)
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.