25#include "X86GenFoldTables.inc"
30 { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb,
TB_BCAST_SS },
31 { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb,
TB_BCAST_SS },
33 { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb,
TB_BCAST_SD },
34 { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb,
TB_BCAST_SD },
36 { X86::VANDPDZ128rr, X86::VANDPSZ128rmb,
TB_BCAST_SS },
37 { X86::VANDPDZ256rr, X86::VANDPSZ256rmb,
TB_BCAST_SS },
39 { X86::VANDPSZ128rr, X86::VANDPDZ128rmb,
TB_BCAST_SD },
40 { X86::VANDPSZ256rr, X86::VANDPDZ256rmb,
TB_BCAST_SD },
42 { X86::VORPDZ128rr, X86::VORPSZ128rmb,
TB_BCAST_SS },
43 { X86::VORPDZ256rr, X86::VORPSZ256rmb,
TB_BCAST_SS },
45 { X86::VORPSZ128rr, X86::VORPDZ128rmb,
TB_BCAST_SD },
46 { X86::VORPSZ256rr, X86::VORPDZ256rmb,
TB_BCAST_SD },
48 { X86::VPANDDZ128rr, X86::VPANDQZ128rmb,
TB_BCAST_Q },
49 { X86::VPANDDZ256rr, X86::VPANDQZ256rmb,
TB_BCAST_Q },
50 { X86::VPANDDZrr, X86::VPANDQZrmb,
TB_BCAST_Q },
51 { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb,
TB_BCAST_Q },
52 { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb,
TB_BCAST_Q },
53 { X86::VPANDNDZrr, X86::VPANDNQZrmb,
TB_BCAST_Q },
54 { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb,
TB_BCAST_D },
55 { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb,
TB_BCAST_D },
56 { X86::VPANDNQZrr, X86::VPANDNDZrmb,
TB_BCAST_D },
57 { X86::VPANDQZ128rr, X86::VPANDDZ128rmb,
TB_BCAST_D },
58 { X86::VPANDQZ256rr, X86::VPANDDZ256rmb,
TB_BCAST_D },
59 { X86::VPANDQZrr, X86::VPANDDZrmb,
TB_BCAST_D },
60 { X86::VPORDZ128rr, X86::VPORQZ128rmb,
TB_BCAST_Q },
61 { X86::VPORDZ256rr, X86::VPORQZ256rmb,
TB_BCAST_Q },
63 { X86::VPORQZ128rr, X86::VPORDZ128rmb,
TB_BCAST_D },
64 { X86::VPORQZ256rr, X86::VPORDZ256rmb,
TB_BCAST_D },
66 { X86::VPXORDZ128rr, X86::VPXORQZ128rmb,
TB_BCAST_Q },
67 { X86::VPXORDZ256rr, X86::VPXORQZ256rmb,
TB_BCAST_Q },
68 { X86::VPXORDZrr, X86::VPXORQZrmb,
TB_BCAST_Q },
69 { X86::VPXORQZ128rr, X86::VPXORDZ128rmb,
TB_BCAST_D },
70 { X86::VPXORQZ256rr, X86::VPXORDZ256rmb,
TB_BCAST_D },
71 { X86::VPXORQZrr, X86::VPXORDZrmb,
TB_BCAST_D },
72 { X86::VXORPDZ128rr, X86::VXORPSZ128rmb,
TB_BCAST_SS },
73 { X86::VXORPDZ256rr, X86::VXORPSZ256rmb,
TB_BCAST_SS },
75 { X86::VXORPSZ128rr, X86::VXORPDZ128rmb,
TB_BCAST_SD },
76 { X86::VXORPSZ256rr, X86::VXORPDZ256rmb,
TB_BCAST_SD },
81 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi,
TB_BCAST_Q },
82 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi,
TB_BCAST_Q },
83 { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi,
TB_BCAST_Q },
84 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi,
TB_BCAST_D },
85 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi,
TB_BCAST_D },
86 { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi,
TB_BCAST_D },
92#define CHECK_SORTED_UNIQUE(TABLE) \
93 assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \
94 assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \
96 #TABLE " is not unique");
99 static std::atomic<bool> FoldTablesChecked(
false);
100 if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
113 FoldTablesChecked.store(
true, std::memory_order_relaxed);
118 if (Data != Table.
end() && Data->KeyOp == RegOp &&
150 FoldTable =
ArrayRef(BroadcastTable1);
152 FoldTable =
ArrayRef(BroadcastTable2);
154 FoldTable =
ArrayRef(BroadcastTable3);
156 FoldTable =
ArrayRef(BroadcastTable4);
167struct X86MemUnfoldTable {
169 std::vector<X86FoldTableEntry> Table;
171 X86MemUnfoldTable() {
217 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
218 "Memory unfolding table is not unique!");
231 static X86MemUnfoldTable MemUnfoldTable;
232 auto &Table = MemUnfoldTable.Table;
234 if (
I != Table.end() &&
I->KeyOp ==
MemOp)
243struct X86BroadcastFoldTable {
245 std::vector<X86FoldTableEntry> Table;
247 X86BroadcastFoldTable() {
250 unsigned RegOp = Reg2Bcst.KeyOp;
251 unsigned BcstOp = Reg2Bcst.DstOp;
253 unsigned MemOp = Reg2Mem->DstOp;
256 Table.push_back({
MemOp, BcstOp, Flags});
260 unsigned RegOp = Reg2Bcst.KeyOp;
261 unsigned BcstOp = Reg2Bcst.DstOp;
263 unsigned MemOp = Reg2Mem->DstOp;
271 unsigned RegOp = Reg2Bcst.KeyOp;
272 unsigned BcstOp = Reg2Bcst.DstOp;
274 unsigned MemOp = Reg2Mem->DstOp;
281 unsigned RegOp = Reg2Bcst.KeyOp;
282 unsigned BcstOp = Reg2Bcst.DstOp;
284 unsigned MemOp = Reg2Mem->DstOp;
292 unsigned RegOp = Reg2Bcst.KeyOp;
293 unsigned BcstOp = Reg2Bcst.DstOp;
295 unsigned MemOp = Reg2Mem->DstOp;
309 unsigned BroadcastBits) {
313 return BroadcastBits == 16;
316 return BroadcastBits == 32;
319 return BroadcastBits == 64;
326 static X86BroadcastFoldTable BroadcastFoldTable;
327 auto &Table = BroadcastFoldTable.Table;
329 I != Table.end() &&
I->KeyOp ==
MemOp; ++
I) {
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const X86FoldTableEntry BroadcastSizeTable2[]
#define CHECK_SORTED_UNIQUE(TABLE)
static const X86FoldTableEntry BroadcastSizeTable3[]
static const X86FoldTableEntry * lookupFoldTableImpl(ArrayRef< X86FoldTableEntry > Table, unsigned RegOp)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This is an optimization pass for GlobalISel generic memory operations.
const X86FoldTableEntry * lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits)
const X86FoldTableEntry * lookupBroadcastFoldTable(unsigned RegOp, unsigned OpNum)
const X86FoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
const X86FoldTableEntry * lookupUnfoldTable(unsigned MemOp)
bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
const X86FoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)