Go to the documentation of this file.
44 : MachineEvaluator(tri, mri), MF(mf), MFI(mf.getFrameInfo()),
TII(tii) {
61 unsigned InVirtReg, InPhysReg = 0;
74 if (
Arg.hasAttribute(Attribute::ByVal))
76 InPhysReg = getNextPhysReg(InPhysReg,
Width);
79 InVirtReg = getVirtRegFor(InPhysReg);
82 if (
Arg.hasAttribute(Attribute::SExt))
83 VRX.
insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt,
Width)));
84 else if (
Arg.hasAttribute(Attribute::ZExt))
85 VRX.
insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt,
Width)));
91 return MachineEvaluator::mask(
Reg, 0);
98 case Hexagon::DoubleRegsRegClassID:
99 case Hexagon::HvxWRRegClassID:
100 case Hexagon::HvxVQRRegClassID:
114 using namespace Hexagon;
116 if (HST.useHVXOps()) {
117 for (
auto &RC : {HvxVRRegClass, HvxWRRegClass, HvxQRRegClass,
119 if (RC.contains(
Reg))
139 assert(IsSubLo != IsSubHi &&
"Must refer to either low or high subreg");
142 switch (RC.
getID()) {
143 case Hexagon::DoubleRegsRegClassID:
144 return Hexagon::IntRegsRegClass;
145 case Hexagon::HvxWRRegClassID:
146 return Hexagon::HvxVRRegClass;
147 case Hexagon::HvxVQRRegClassID:
148 return Hexagon::HvxWRRegClass;
153 dbgs() <<
"Reg class id: " << RC.
getID() <<
" idx: " << Idx <<
'\n';
161 std::vector<BT::RegisterRef>
Vector;
165 for (
unsigned i = 0,
n =
Vector.size();
i <
n; ++
i) {
188 using namespace Hexagon;
190 unsigned NumDefs = 0;
194 if (!MO.isReg() || !MO.isDef())
197 assert(MO.getSubReg() == 0);
203 unsigned Opc =
MI.getOpcode();
213 return evaluateLoad(
MI, Inputs, Outputs);
232 if (evaluateFormalCopy(
MI, Inputs, Outputs))
243 if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() ||
248 RegisterRefs
Reg(
MI);
249 #define op(i) MI.getOperand(i)
250 #define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs))
251 #define im(i) MI.getOperand(i).getImm()
264 auto cop = [
this, &
Reg, &
MI, &Inputs](
unsigned N,
278 return eXTR(RC, 0, RW);
291 return eXTR(RC,
N*16,
N*16+16);
314 unsigned Reg0 =
Reg[0].Reg;
323 return rr0(
eIMM(
im(1), W0), Outputs);
329 int FI =
op(1).getIndex();
330 int Off =
op(2).getImm();
335 return rr0(RC, Outputs);
343 return rr0(
rc(1), Outputs);
351 return rr0(RC, Outputs);
358 return rr0(
eINS(RC,
eXTR(
rc(1), 0, PW), 0), Outputs);
370 assert(W0 == 64 && W1 == 32);
373 return rr0(RC, Outputs);
377 return rr0(
eADD(
rc(1),
rc(2)), Outputs);
380 case S4_addi_asl_ri: {
382 return rr0(RC, Outputs);
384 case S4_addi_lsr_ri: {
386 return rr0(RC, Outputs);
390 return rr0(RC, Outputs);
392 case M4_mpyri_addi: {
395 return rr0(RC, Outputs);
397 case M4_mpyrr_addi: {
400 return rr0(RC, Outputs);
402 case M4_mpyri_addr_u2: {
405 return rr0(RC, Outputs);
407 case M4_mpyri_addr: {
410 return rr0(RC, Outputs);
412 case M4_mpyrr_addr: {
415 return rr0(RC, Outputs);
419 return rr0(RC, Outputs);
423 return rr0(RC, Outputs);
427 return rr0(RC, Outputs);
431 return rr0(RC, Outputs);
433 case S2_addasl_rrri: {
435 return rr0(RC, Outputs);
440 return rr0(
eADD(RPC,
eIMM(
im(2), W0)), Outputs);
444 return rr0(
eSUB(
rc(1),
rc(2)), Outputs);
447 case S4_subi_asl_ri: {
449 return rr0(RC, Outputs);
451 case S4_subi_lsr_ri: {
453 return rr0(RC, Outputs);
457 return rr0(RC, Outputs);
461 return rr0(RC, Outputs);
465 return rr0(
eSUB(
eIMM(0, W0),
rc(1)), Outputs);
469 return rr0(hi(
M, W0), Outputs);
472 return rr0(
eMLS(
rc(1),
rc(2)), Outputs);
473 case M2_dpmpyss_acc_s0:
475 case M2_dpmpyss_nac_s0:
479 return rr0(lo(
M, W0), Outputs);
484 return rr0(RC, Outputs);
489 return rr0(RC, Outputs);
494 return rr0(RC, Outputs);
499 return rr0(RC, Outputs);
503 return rr0(lo(
M, 32), Outputs);
507 return rr0(lo(
M, 32), Outputs);
511 return rr0(lo(
M, 32), Outputs);
515 return rr0(hi(
M, W0), Outputs);
518 return rr0(
eMLU(
rc(1),
rc(2)), Outputs);
519 case M2_dpmpyuu_acc_s0:
521 case M2_dpmpyuu_nac_s0:
531 return rr0(
eAND(
rc(1),
rc(2)), Outputs);
535 case S4_andi_asl_ri: {
537 return rr0(RC, Outputs);
539 case S4_andi_lsr_ri: {
541 return rr0(RC, Outputs);
555 return rr0(
eORL(
rc(1),
rc(2)), Outputs);
559 case S4_ori_asl_ri: {
561 return rr0(RC, Outputs);
563 case S4_ori_lsr_ri: {
565 return rr0(RC, Outputs);
574 return rr0(RC, Outputs);
578 return rr0(RC, Outputs);
586 return rr0(
eXOR(
rc(1),
rc(2)), Outputs);
597 return rr0(
eNOT(
rc(1)), Outputs);
601 return rr0(
eASL(
rc(1),
im(2)), Outputs);
603 return rr0(
eASL(
rc(1), 16), Outputs);
616 case S2_asl_i_r_xacc:
617 case S2_asl_i_p_xacc:
626 return rr0(
eASR(
rc(1),
im(2)), Outputs);
628 return rr0(
eASR(
rc(1), 16), Outputs);
641 case S2_asr_i_r_rnd: {
647 return rr0(
eXTR(RC, 0, W0), Outputs);
649 case S2_asr_i_r_rnd_goodsyntax: {
652 return rr0(
rc(1), Outputs);
656 return rr0(
eXTR(RC, 0, W0), Outputs);
660 case S2_asr_i_svw_trun:
666 return rr0(
eLSR(
rc(1),
im(2)), Outputs);
679 case S2_lsr_i_r_xacc:
680 case S2_lsr_i_p_xacc:
686 return rr0(RC, Outputs);
691 return rr0(RC, Outputs);
693 case S2_togglebit_i: {
699 return rr0(RC, Outputs);
708 .
fill(W1+(W1-BX), W0, Zero);
711 return rr0(RC, Outputs);
720 return rr0(
eIMM(0, W0), Outputs);
727 if (Opc == S2_extractu || Opc == S2_extractup)
728 return rr0(
eZXT(RC, Wd), Outputs);
729 return rr0(
eSXT(RC, Wd), Outputs);
734 assert(Wd < W0 && Of < W0);
739 return rr0(
rc(1), Outputs);
740 return rr0(
eINS(
rc(1),
eXTR(
rc(2), 0, Wd), Of), Outputs);
752 return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
756 case A2_combine_hh: {
760 unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl);
762 unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh);
766 return rr0(RC, Outputs);
775 return rr0(RC, Outputs);
779 return rr0(RC, Outputs);
783 return rr0(RC, Outputs);
787 return rr0(RC, Outputs);
791 return rr0(RC, Outputs);
796 assert(WR == 64 && WP == 8);
804 return rr0(RC, Outputs);
816 if (PC0.
is(0) || PC0.
is(1))
819 return rr0(
R2, Outputs);
828 return rr0(
eSXT(
rc(1), 8), Outputs);
830 return rr0(
eSXT(
rc(1), 16), Outputs);
833 assert(W0 == 64 && W1 == 32);
835 return rr0(RC, Outputs);
838 return rr0(
eZXT(
rc(1), 8), Outputs);
840 return rr0(
eZXT(
rc(1), 16), Outputs);
858 return rr0(
eCLB(
rc(1),
false, 32), Outputs);
861 return rr0(
eCLB(
rc(1),
true, 32), Outputs);
867 if (TV.
is(0) || TV.
is(1))
868 return rr0(
eCLB(R1, TV, 32), Outputs);
873 return rr0(
eCTB(
rc(1),
false, 32), Outputs);
876 return rr0(
eCTB(
rc(1),
true, 32), Outputs);
883 bool Has0 =
false, All1 =
true;
896 return rr0(RC, Outputs);
900 bool Has1 =
false, All0 =
true;
913 return rr0(RC, Outputs);
916 return rr0(
eAND(
rc(1),
rc(2)), Outputs);
920 return rr0(
eNOT(
rc(1)), Outputs);
922 return rr0(
eORL(
rc(1),
rc(2)), Outputs);
926 return rr0(
eXOR(
rc(1),
rc(2)), Outputs);
954 if (V.
is(0) || V.
is(1)) {
956 bool TV = (Opc == S2_tstbit_i);
966 if (
unsigned DefR = getUniqueDefVReg(
MI)) {
977 return MachineEvaluator::evaluate(
MI, Inputs, Outputs);
988 bool &FallsThru)
const {
992 bool SimpleBranch =
false;
993 bool Negated =
false;
995 case Hexagon::J2_jumpf:
996 case Hexagon::J2_jumpfpt:
997 case Hexagon::J2_jumpfnew:
998 case Hexagon::J2_jumpfnewpt:
1001 case Hexagon::J2_jumpt:
1002 case Hexagon::J2_jumptpt:
1003 case Hexagon::J2_jumptnew:
1004 case Hexagon::J2_jumptnewpt:
1007 SimpleBranch =
true;
1009 case Hexagon::J2_jump:
1032 if (!
Test.is(!Negated)) {
1043 unsigned HexagonEvaluator::getUniqueDefVReg(
const MachineInstr &
MI)
const {
1044 unsigned DefReg = 0;
1046 if (!
Op.isReg() || !
Op.isDef())
1061 using namespace Hexagon;
1065 assert(
MI.mayLoad() &&
"A load that mayn't?");
1066 unsigned Opc =
MI.getOpcode();
1077 case L2_loadalignb_pbr:
1078 case L2_loadalignb_pcr:
1079 case L2_loadalignb_pi:
1081 case L2_loadalignh_pbr:
1082 case L2_loadalignh_pcr:
1083 case L2_loadalignh_pi:
1085 case L2_loadbsw2_pbr:
1086 case L2_loadbsw2_pci:
1087 case L2_loadbsw2_pcr:
1088 case L2_loadbsw2_pi:
1089 case L2_loadbsw4_pbr:
1090 case L2_loadbsw4_pci:
1091 case L2_loadbsw4_pcr:
1092 case L2_loadbsw4_pi:
1094 case L2_loadbzw2_pbr:
1095 case L2_loadbzw2_pci:
1096 case L2_loadbzw2_pcr:
1097 case L2_loadbzw2_pi:
1098 case L2_loadbzw4_pbr:
1099 case L2_loadbzw4_pci:
1100 case L2_loadbzw4_pcr:
1101 case L2_loadbzw4_pi:
1120 case L2_loadrub_pbr:
1121 case L2_loadrub_pci:
1122 case L2_loadrub_pcr:
1148 case L2_loadruh_pbr:
1149 case L2_loadruh_pci:
1150 case L2_loadruh_pcr:
1166 case L2_loadw_locked:
1182 case L4_loadd_locked:
1197 assert(
W >= BitNum && BitNum > 0);
1216 bool HexagonEvaluator::evaluateFormalCopy(
const MachineInstr &
MI,
1241 if (
F->second.Type == ExtType::SExt)
1243 else if (
F->second.Type == ExtType::ZExt)
1250 unsigned HexagonEvaluator::getNextPhysReg(
unsigned PReg,
unsigned Width)
const {
1251 using namespace Hexagon;
1253 bool Is64 = DoubleRegsRegClass.contains(PReg);
1254 assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
1256 static const unsigned Phys32[] = { R0, R1,
R2, R3,
R4, R5 };
1257 static const unsigned Phys64[] = { D0, D1, D2 };
1258 const unsigned Num32 =
sizeof(Phys32)/
sizeof(
unsigned);
1259 const unsigned Num64 =
sizeof(Phys64)/
sizeof(
unsigned);
1263 return (
Width <= 32) ? Phys32[0] : Phys64[0];
1267 unsigned Idx32 = 0, Idx64 = 0;
1269 while (Idx32 < Num32) {
1270 if (Phys32[Idx32] == PReg)
1276 while (Idx64 < Num64) {
1277 if (Phys64[Idx64] == PReg)
1285 return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0;
1286 return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
1289 unsigned HexagonEvaluator::getVirtRegFor(
unsigned PReg)
const {
1290 for (std::pair<unsigned,unsigned>
P :
MRI.
liveins())
1291 if (
P.first == PReg)
This class represents an incoming formal argument to a Function.
should just be implemented with a CLZ instruction Since there are other e that share this it would be best to implement this in a target independent as zero is the default value for the binary encoder e add r0 add r5 Register operands should be distinct That is
RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const
unsigned getID() const
Return the register class ID number.
This is an optimization pass for GlobalISel generic memory operations.
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
iterator_range< arg_iterator > args()
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const
RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const
bool isPointerTy() const
True if this is an instance of PointerType.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
static RegisterCell ref(const RegisterCell &C)
Reg
All possible values of the reg field in the ModR/M byte.
RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const
The instances of the Type class are immutable: once they are created, they are never changed.
uint16_t getPhysRegBitWidth(MCRegister Reg) const override
bool isPredicated(const MachineInstr &MI) const override
Returns true if the instruction is already predicated.
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const
RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const
static RegisterCell self(unsigned Reg, uint16_t Width)
RegisterCell & fill(uint16_t B, uint16_t E, const BitValue &V)
RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const
RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DenseMapIterator< unsigned, ExtType, DenseMapInfo< unsigned >, llvm::detail::DenseMapPair< unsigned, ExtType >, true > const_iterator
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool is(unsigned T) const
void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const
BitTracker::RegisterRef RegisterRef
BitTracker::CellMapType CellMapType
const MachineOperand & getOperand(unsigned i) const
RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const
RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
static BitValue self(const BitRef &Self=BitRef())
RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const
unsigned getIntegerBitWidth() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
BitTracker::BitMask mask(Register Reg, unsigned Sub) const override
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
RegisterCell eIMM(int64_t V, uint16_t W) const
bool isIntegerTy() const
True if this is an instance of IntegerType.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
RegisterCell eNOT(const RegisterCell &A1) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
MachineRegisterInfo & MRI
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
ArrayRef< std::pair< MCRegister, Register > > liveins() const
void shuffle(Iterator first, Iterator last, RNG &&g)
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri, const HexagonInstrInfo &tii, MachineFunction &mf)
RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const
bool evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const override
RegisterCell & cat(const RegisterCell &RC)
RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const
bool insert(const value_type &X)
Insert a new element into the SetVector.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineBasicBlock * getMBB() const
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Wrapper class representing virtual and physical registers.
BitTracker::RegisterCell RegisterCell
RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2, uint16_t AtN) const
uint64_t value() const
This is a hole in the type system and should not be abused.
uint16_t getRegBitWidth(const RegisterRef &RR) const
const TargetRegisterInfo & TRI
RegisterCell & insert(const RegisterCell &RC, const BitMask &M)
const TargetRegisterClass & composeWithSubRegIndex(const TargetRegisterClass &RC, unsigned Idx) const override
static BitValue ref(const BitValue &V)
RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const
RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
A vector that has set insertion semantics.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
const HexagonInstrInfo & TII
Wrapper class representing physical registers. Should be passed by value.