LLVM 22.0.0git
HexagonGenWideningVecInstr.cpp
Go to the documentation of this file.
1//===--------------------- HexagonGenWideningVecInstr.cpp -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Replace widening vector operations with hexagon intrinsics.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/APInt.h"
15#include "llvm/IR/BasicBlock.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/Instruction.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23#include "llvm/IR/Type.h"
24#include "llvm/IR/Value.h"
26#include "llvm/Pass.h"
28#include <algorithm>
29#include <utility>
30
31using namespace llvm;
32
33// A command line argument to enable the generation of widening instructions
34// for short-vectors.
36 "hexagon-widen-short-vector",
37 cl::desc("Generate widening instructions for short vectors."), cl::Hidden);
38
39namespace llvm {
42} // end namespace llvm
43
44namespace {
45
46class HexagonGenWideningVecInstr : public FunctionPass {
47public:
48 static char ID;
49
50 HexagonGenWideningVecInstr() : FunctionPass(ID) {
52 }
53
54 HexagonGenWideningVecInstr(const HexagonTargetMachine *TM)
55 : FunctionPass(ID), TM(TM) {
57 }
58
59 StringRef getPassName() const override {
60 return "Hexagon generate widening vector instructions";
61 }
62
63 bool runOnFunction(Function &F) override;
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 FunctionPass::getAnalysisUsage(AU);
67 }
68
69private:
70 Module *M = nullptr;
71 const HexagonTargetMachine *TM = nullptr;
72 const HexagonSubtarget *HST = nullptr;
73 unsigned HwVLen;
74 enum OPKind { OP_None = 0, OP_Add, OP_Sub, OP_Mul, OP_Shl };
75
76 struct OPInfo {
77 Value *OP = nullptr;
78 Value *ExtInOP = nullptr;
79 bool IsZExt = false;
80 unsigned ExtInSize = 0;
81 bool IsScalar = false;
82 };
83
84 bool visitBlock(BasicBlock *B);
85 bool processInstruction(Instruction *Inst);
86 bool replaceWithIntrinsic(Instruction *Inst, OPKind OPK, OPInfo &OP1Info,
87 OPInfo &OP2Info);
88 bool getOperandInfo(Value *V, OPInfo &OPI);
89 bool isExtendedConstant(Constant *C, bool IsSigned);
90 unsigned getElementSizeInBits(Value *V, bool IsZExt);
91 Type *getElementTy(unsigned size, IRBuilder<> &IRB);
92
93 Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
94 unsigned NewEltsize, unsigned NumElts);
95
96 Intrinsic::ID getIntrinsic(OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt,
97 unsigned NewOpEltSize, unsigned NewResEltSize,
98 bool IsConstScalar, int ConstOpNum);
99
100 std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst,
101 Type *NewOpType);
102
103 Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
104 Value *NewOP2, Type *ResType, unsigned NumElts,
105 bool Interleave);
106 bool processInstructionForVMPA(Instruction *Inst);
107 bool getVmpaOperandInfo(Value *V, OPInfo &OPI);
108 void reorderVmpaOperands(OPInfo *OPI);
109 bool replaceWithVmpaIntrinsic(Instruction *Inst, OPInfo *OPI);
110 bool genSaturatingInst(Instruction *Inst);
111 bool getMinMax(Constant *MinC, Constant *MaxC, std::pair<int, int> &MinMax);
112 bool isSaturatingVAsr(Instruction *Inst, Value *S, int MinV, int MaxV,
113 bool &IsResSigned);
114 Value *extendShiftByVal(Value *ShiftByVal, IRBuilder<> &IRB);
115 Intrinsic::ID getVAsrIntrinsic(bool IsInSigned, bool IsResSigned);
116 Value *createVAsrIntrinsic(Instruction *Inst, Value *VecOP, Value *ShiftByVal,
117 bool IsResSigned);
118 bool genVAvg(Instruction *Inst);
119 bool checkConstantVector(Value *OP, int64_t &SplatVal, bool IsOPZExt);
120 void updateMPYConst(Intrinsic::ID IntId, int64_t &SplatVal, bool IsOPZExt,
121 Value *&OP, IRBuilder<> &IRB);
122 void packConstant(Intrinsic::ID IntId, int64_t &SplatVal, Value *&OP,
123 IRBuilder<> &IRB);
124};
125
126} // end anonymous namespace
127
128char HexagonGenWideningVecInstr::ID = 0;
129
130INITIALIZE_PASS_BEGIN(HexagonGenWideningVecInstr, "widening-vec",
131 "Hexagon generate "
132 "widening vector instructions",
133 false, false)
135INITIALIZE_PASS_END(HexagonGenWideningVecInstr, "widening-vec",
136 "Hexagon generate "
137 "widening vector instructions",
139
141 if (Value *SplatV = C->getSplatValue()) {
142 auto *CI = dyn_cast<ConstantInt>(SplatV);
143 assert(CI);
144 return CI->getValue().isNegative();
145 }
146 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
147 for (unsigned i = 0, e = NumElts; i != e; ++i) {
148 auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i));
149 assert(CI);
150 if (CI->getValue().isNegative())
151 return true;
152 continue;
153 }
154 return false;
155}
156
157bool HexagonGenWideningVecInstr::getOperandInfo(Value *V, OPInfo &OPI) {
158 using namespace PatternMatch;
159 OPI.OP = V;
160 Value *ExtV = nullptr;
161 Constant *C = nullptr;
162
163 bool Match = false;
164 if ((Match = (match(V, (m_ZExt(m_Value(ExtV)))) ||
166 m_ZExt(m_Value(ExtV)), m_Zero()),
167 m_Poison(), m_ZeroMask()))))) {
168 OPI.ExtInOP = ExtV;
169 OPI.IsZExt = true;
170 }
171
172 if (!Match &&
173 (Match = (match(V, (m_SExt(m_Value(ExtV)))) ||
175 m_SExt(m_Value(ExtV)), m_Zero()),
176 m_Poison(), m_ZeroMask()))))) {
177 OPI.ExtInOP = ExtV;
178 OPI.IsZExt = false;
179 }
180 if (!Match &&
181 (Match =
183 m_Poison(), m_ZeroMask()))))) {
184 if (match(ExtV, m_And(m_Value(), m_SpecificInt(255)))) {
185 OPI.ExtInOP = ExtV;
186 OPI.IsZExt = true;
187 OPI.ExtInSize = 8;
188 return true;
189 }
190 if (match(ExtV, m_And(m_Value(), m_SpecificInt(65535)))) {
191 OPI.ExtInOP = ExtV;
192 OPI.IsZExt = true;
193 OPI.ExtInSize = 16;
194 return true;
195 }
196 return false;
197 }
198
199 if (!Match && (Match = match(V, m_Constant(C)))) {
200 if (!isExtendedConstant(C, false) && !isExtendedConstant(C, true))
201 return false;
202 OPI.ExtInOP = C;
203 OPI.IsZExt = !hasNegativeValues(C);
204 }
205
206 if (!Match)
207 return false;
208
209 // If the operand is extended, find the element size of its input.
210 if (OPI.ExtInOP)
211 OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
212 return true;
213}
214
215bool HexagonGenWideningVecInstr::isExtendedConstant(Constant *C,
216 bool IsSigned) {
217 Type *CTy = cast<FixedVectorType>(C->getType())->getElementType();
218 unsigned EltSize = CTy->getPrimitiveSizeInBits();
219 unsigned HalfSize = EltSize / 2;
220 if (Value *SplatV = C->getSplatValue()) {
221 if (auto *CI = dyn_cast<ConstantInt>(SplatV))
222 return IsSigned ? isIntN(HalfSize, CI->getSExtValue())
223 : isUIntN(HalfSize, CI->getZExtValue());
224 return false;
225 }
226 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
227 for (unsigned i = 0, e = NumElts; i != e; ++i) {
228 if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
229 if ((IsSigned && !isIntN(HalfSize, CI->getSExtValue())) ||
230 (!IsSigned && !isUIntN(HalfSize, CI->getZExtValue())))
231 return false;
232 continue;
233 }
234 return false;
235 }
236 return true;
237}
238
239unsigned HexagonGenWideningVecInstr::getElementSizeInBits(Value *V,
240 bool IsZExt = false) {
241 using namespace PatternMatch;
242 Type *ValTy = V->getType();
243 Type *EltTy = ValTy;
244 if (auto *C = dyn_cast<Constant>(V)) {
245 unsigned NumElts = cast<FixedVectorType>(EltTy)->getNumElements();
246 unsigned EltSize = cast<FixedVectorType>(EltTy)
247 ->getElementType()
248 ->getPrimitiveSizeInBits()
249 .getKnownMinValue();
250 unsigned ReducedSize = EltSize / 2;
251
252 while (ReducedSize >= 8) {
253 for (unsigned i = 0, e = NumElts; i != e; ++i) {
254 if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
255 if (IsZExt) {
256 if (!isUIntN(ReducedSize, CI->getZExtValue()))
257 return EltSize;
258 } else if (!isIntN(ReducedSize, CI->getSExtValue()))
259 return EltSize;
260 }
261 }
262 EltSize = ReducedSize;
263 ReducedSize = ReducedSize / 2;
264 }
265 return EltSize;
266 }
267
268 if (ValTy->isVectorTy())
269 EltTy = cast<FixedVectorType>(ValTy)->getElementType();
270 return EltTy->getPrimitiveSizeInBits();
271}
272
273Value *HexagonGenWideningVecInstr::adjustExtensionForOp(OPInfo &OPI,
274 IRBuilder<> &IRB,
275 unsigned NewExtSize,
276 unsigned NumElts) {
277 Value *V = OPI.ExtInOP;
278 bool IsZExt = OPI.IsZExt;
279 unsigned EltSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
280 Type *EltType = getElementTy(NewExtSize, IRB);
281 auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
282
283 if (dyn_cast<Constant>(V))
284 return IRB.CreateTrunc(V, NewOpTy);
285
286 if (V->getType()->isVectorTy()) {
287 if (NewExtSize == EltSize)
288 return V;
289 assert(NewExtSize == 16);
290 auto *NewOpTy = FixedVectorType::get(IRB.getInt16Ty(), NumElts);
291 return (IsZExt) ? IRB.CreateZExt(V, NewOpTy) : IRB.CreateSExt(V, NewOpTy);
292 }
293
294 // The operand must correspond to a shuffle vector which is used to construct
295 // a vector out of a scalar. Since the scalar value (V) is extended,
296 // replace it with a new shuffle vector with the smaller element size.
297 [[maybe_unused]] auto *I = dyn_cast<Instruction>(OPI.OP);
298 assert(I && I->getOpcode() == Instruction::ShuffleVector);
299
300 if (NewExtSize > EltSize)
301 V = (IsZExt) ? IRB.CreateZExt(V, EltType) : IRB.CreateSExt(V, EltType);
302 else if (NewExtSize < EltSize)
303 V = IRB.CreateTrunc(V, EltType);
304
305 Value *IE =
306 IRB.CreateInsertElement(PoisonValue::get(NewOpTy), V, IRB.getInt32(0));
307
308 SmallVector<Constant *, 8> ShuffleMask;
309 for (unsigned i = 0; i < NumElts; ++i)
310 ShuffleMask.push_back(IRB.getInt32(0));
311
312 return IRB.CreateShuffleVector(IE, PoisonValue::get(NewOpTy),
313 ConstantVector::get(ShuffleMask));
314}
315
316Intrinsic::ID HexagonGenWideningVecInstr::getIntrinsic(
317 OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, unsigned InEltSize,
318 unsigned ResEltSize, bool IsConstScalar, int ConstOpNum) {
319 // Since the operands have been extended, the ResEltSize must be 16 or more.
320 switch (OPK) {
321 case OP_Add:
322 // Both operands should be either zero extended or sign extended.
323 assert(IsOP1ZExt == IsOP2ZExt);
324 if (InEltSize == 8 && ResEltSize == 16) {
325 // Operands must be zero extended as we don't have a widening vector
326 // 'add' that can take signed exteded values.
327 assert(IsOP1ZExt && "Operands must be zero-extended");
328 return Intrinsic::hexagon_vadd_uu;
329 }
330 if (InEltSize == 16 && ResEltSize == 32)
331 return (IsOP1ZExt) ? Intrinsic::hexagon_vadd_uu
332 : Intrinsic::hexagon_vadd_ss;
333
334 llvm_unreachable("Incorrect input and output operand sizes");
335
336 case OP_Sub:
337 // Both operands should be either zero extended or sign extended.
338 assert(IsOP1ZExt == IsOP2ZExt);
339 if (InEltSize == 8 && ResEltSize == 16) {
340 // Operands must be zero extended as we don't have a widening vector
341 // 'sub' that can take signed exteded values.
342 assert(IsOP1ZExt && "Operands must be zero-extended");
343 return Intrinsic::hexagon_vsub_uu;
344 }
345 if (InEltSize == 16 && ResEltSize == 32)
346 return (IsOP1ZExt) ? Intrinsic::hexagon_vsub_uu
347 : Intrinsic::hexagon_vsub_ss;
348
349 llvm_unreachable("Incorrect input and output operand sizes");
350
351 case OP_Mul:
352 assert(ResEltSize == 2 * InEltSize);
353 // Enter inside 'if' block when one of the operand is constant vector
354 if (IsConstScalar) {
355 // When inputs are of 8bit type and output is 16bit type, enter 'if' block
356 if (InEltSize == 8 && ResEltSize == 16) {
357 // Enter the 'if' block, when 2nd operand of the mul instruction is
358 // constant vector, otherwise enter 'else' block
359 if (ConstOpNum == 2 && IsOP1ZExt) {
360 // If the value inside the constant vector is zero-extended, then
361 // return hexagon_vmpy_ub_ub, else return hexagon_vmpy_ub_b
362 return (IsOP2ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
363 : Intrinsic::hexagon_vmpy_ub_b;
364 } else if (ConstOpNum == 1 && IsOP2ZExt) {
365 return (IsOP1ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
366 : Intrinsic::hexagon_vmpy_ub_b;
367 }
368 }
369 // When inputs are of 16bit type and output is 32bit type,
370 // enter 'if' block
371 if (InEltSize == 16 && ResEltSize == 32) {
372 if (IsOP1ZExt && IsOP2ZExt) {
373 // If the value inside the constant vector and other operand is
374 // zero-extended, then return hexagon_vmpy_uh_uh
375 return Intrinsic::hexagon_vmpy_uh_uh;
376 } else if (!IsOP1ZExt && !IsOP2ZExt) {
377 // If the value inside the constant vector and other operand is
378 // sign-extended, then return hexagon_vmpy_h_h
379 return Intrinsic::hexagon_vmpy_h_h;
380 }
381 }
382 }
383 if (IsOP1ZExt)
384 return IsOP2ZExt ? Intrinsic::hexagon_vmpy_uu
385 : Intrinsic::hexagon_vmpy_us;
386 else
387 return IsOP2ZExt ? Intrinsic::hexagon_vmpy_su
388 : Intrinsic::hexagon_vmpy_ss;
389 default:
390 llvm_unreachable("Instruction not handled!");
391 }
392}
393
394Type *HexagonGenWideningVecInstr::getElementTy(unsigned size,
395 IRBuilder<> &IRB) {
396 switch (size) {
397 case 8:
398 return IRB.getInt8Ty();
399 case 16:
400 return IRB.getInt16Ty();
401 case 32:
402 return IRB.getInt32Ty();
403 default:
404 llvm_unreachable("Unhandled Element size");
405 }
406}
407
408Value *HexagonGenWideningVecInstr::createIntrinsic(
409 Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
410 Type *ResType, unsigned NumElts, bool Interleave = true) {
411 IRBuilder<> IRB(Inst);
412 Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId, ResType);
413 Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});
414 if (Interleave) {
415 // Interleave elements in the output vector.
416 SmallVector<Constant *, 16> ShuffleMask;
417 unsigned HalfElts = NumElts / 2;
418 for (unsigned i = 0; i < HalfElts; ++i) {
419 ShuffleMask.push_back(IRB.getInt32(i));
420 ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
421 }
422 NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
423 ConstantVector::get(ShuffleMask));
424 }
425 return NewIn;
426}
427
428std::pair<Value *, Value *>
429HexagonGenWideningVecInstr::opSplit(Value *OP, Instruction *Inst,
430 Type *NewOpType) {
431 Type *InstTy = Inst->getType();
432 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
433 IRBuilder<> IRB(Inst);
434 if (InstTy->getPrimitiveSizeInBits() < 2 * HwVLen) {
435 // The only time we need to split an OP even though it is not a
436 // vector-pair is while generating vasr instruction for the short vector.
437 // Since hi/lo intrinsics can't be used here as they expect the operands to
438 // be of 64xi32 type, the shuffle_vector pair with the appropriate masks is
439 // used instead.
440 assert(NumElts % 2 == 0 && "Unexpected Vector Type!!");
441 unsigned HalfElts = NumElts / 2;
444 for (unsigned i = 0; i < HalfElts; ++i)
445 LoM.push_back(IRB.getInt32(i));
446 for (unsigned i = 0; i < HalfElts; ++i)
447 HiM.push_back(IRB.getInt32(HalfElts + i));
448
449 Value *Hi = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
451 Value *Lo = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
453 return std::pair<Value *, Value *>(Hi, Lo);
454 }
455
456 Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
457 Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
460 auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
461 OP = IRB.CreateBitCast(OP, InType);
462 Value *Hi = IRB.CreateCall(ExtFHi, {OP}); // 32xi32
463 Value *Lo = IRB.CreateCall(ExtFLo, {OP});
464 Hi = IRB.CreateBitCast(Hi, NewOpType);
465 Lo = IRB.CreateBitCast(Lo, NewOpType);
466 return std::pair<Value *, Value *>(Hi, Lo);
467}
468
469bool HexagonGenWideningVecInstr::checkConstantVector(Value *OP,
470 int64_t &SplatVal,
471 bool IsOPZExt) {
472 if (auto *C1 = dyn_cast<Constant>(OP)) {
473 if (Value *SplatV = C1->getSplatValue()) {
474 auto *CI = dyn_cast<ConstantInt>(SplatV);
475 if (IsOPZExt) {
476 SplatVal = CI->getZExtValue();
477 } else {
478 SplatVal = CI->getSExtValue();
479 }
480 return true;
481 }
482 }
483 return false;
484}
485
486void HexagonGenWideningVecInstr::updateMPYConst(Intrinsic::ID IntId,
487 int64_t &SplatVal,
488 bool IsOPZExt, Value *&OP,
489 IRBuilder<> &IRB) {
490 if ((IntId == Intrinsic::hexagon_vmpy_uu ||
491 IntId == Intrinsic::hexagon_vmpy_us ||
492 IntId == Intrinsic::hexagon_vmpy_su ||
493 IntId == Intrinsic::hexagon_vmpy_ss) &&
494 OP->getType()->isVectorTy()) {
495 // Create a vector with all elements equal to SplatVal
496 auto *VecTy = cast<VectorType>(OP->getType());
497 Value *scalar = IRB.getIntN(VecTy->getScalarSizeInBits(),
498 static_cast<uint32_t>(SplatVal));
499 Value *splatVector = ConstantVector::getSplat(VecTy->getElementCount(),
500 cast<Constant>(scalar));
501 OP = IsOPZExt ? IRB.CreateZExt(splatVector, VecTy)
502 : IRB.CreateSExt(splatVector, VecTy);
503 } else {
504 packConstant(IntId, SplatVal, OP, IRB);
505 }
506}
507
508void HexagonGenWideningVecInstr::packConstant(Intrinsic::ID IntId,
509 int64_t &SplatVal, Value *&OP,
510 IRBuilder<> &IRB) {
511 uint32_t Val32 = static_cast<uint32_t>(SplatVal);
512 if (IntId == Intrinsic::hexagon_vmpy_ub_ub) {
513 assert(SplatVal >= 0 && SplatVal <= UINT8_MAX);
514 uint32_t packed = (Val32 << 24) | (Val32 << 16) | (Val32 << 8) | Val32;
515 OP = IRB.getInt32(packed);
516 } else if (IntId == Intrinsic::hexagon_vmpy_ub_b) {
517 assert(SplatVal >= INT8_MIN && SplatVal <= INT8_MAX);
518 uint32_t packed = (Val32 << 24) | ((Val32 << 16) & ((1 << 24) - 1)) |
519 ((Val32 << 8) & ((1 << 16) - 1)) |
520 (Val32 & ((1 << 8) - 1));
521 OP = IRB.getInt32(packed);
522 } else if (IntId == Intrinsic::hexagon_vmpy_uh_uh) {
523 assert(SplatVal >= 0 && SplatVal <= UINT16_MAX);
524 uint32_t packed = (Val32 << 16) | Val32;
525 OP = IRB.getInt32(packed);
526 } else if (IntId == Intrinsic::hexagon_vmpy_h_h) {
527 assert(SplatVal >= INT16_MIN && SplatVal <= INT16_MAX);
528 uint32_t packed = (Val32 << 16) | (Val32 & ((1 << 16) - 1));
529 OP = IRB.getInt32(packed);
530 }
531}
532
533bool HexagonGenWideningVecInstr::replaceWithIntrinsic(Instruction *Inst,
534 OPKind OPK,
535 OPInfo &OP1Info,
536 OPInfo &OP2Info) {
537 Type *InstTy = Inst->getType();
538 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
539 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
540 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
541
542 bool IsOP1ZExt = OP1Info.IsZExt;
543 bool IsOP2ZExt = OP2Info.IsZExt;
544
545 // The resulting values of 'add' and 'sub' are always sign-extended.
546 bool IsResZExt = (OPK == OP_Mul || OPK == OP_Shl)
547 ? (OP1Info.IsZExt && OP2Info.IsZExt)
548 : false;
549
550 unsigned MaxEltSize = std::max(OP1Info.ExtInSize, OP2Info.ExtInSize);
551 unsigned NewOpEltSize = MaxEltSize;
552 unsigned NewResEltSize = 2 * MaxEltSize;
553
554 // For Add and Sub, both the operands should be either zero extended
555 // or sign extended. In case of a mismatch, they are extended to the
556 // next size (ex: 8 bits -> 16 bits) so that the sign-extended vadd/vsub
557 // instructions can be used. Also, we don't support 8-bits signed vadd/vsub
558 // instructions. They are extended to 16-bits and then signed 16-bits
559 // non-widening vadd/vsub is used to perform the operation.
560 if (OPK != OP_Mul && OPK != OP_Shl &&
561 (IsOP1ZExt != IsOP2ZExt || (!IsOP1ZExt && NewOpEltSize == 8)))
562 NewOpEltSize = 2 * NewOpEltSize;
563
564 unsigned ResVLen = NewResEltSize * NumElts;
565 if (ResVLen < HwVLen && !WidenShortVector)
566 return false;
567 if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))
568 return false;
569
570 IRBuilder<> IRB(Inst);
571 Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);
572 Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);
573
574 if (NewOpEltSize == NewResEltSize) {
575 assert(OPK != OP_Mul && OPK != OP_Shl);
576 // Instead of intrinsics, use vector add/sub.
577 Value *NewIn = IRB.CreateBinOp(cast<BinaryOperator>(Inst)->getOpcode(),
578 NewOP1, NewOP2);
579 if (InstEltSize > NewResEltSize)
580 NewIn = IRB.CreateSExt(NewIn, InstTy);
581 Inst->replaceAllUsesWith(NewIn);
582 return true;
583 }
584
585 bool IsConstScalar = false;
586 int64_t SplatVal = 0;
587 int ConstOpNum = 1;
588 if (OPK == OP_Mul || OPK == OP_Shl) {
589 IsConstScalar = checkConstantVector(NewOP1, SplatVal, IsOP1ZExt);
590 if (!IsConstScalar) {
591 IsConstScalar = checkConstantVector(NewOP2, SplatVal, IsOP2ZExt);
592 ConstOpNum = 2;
593 }
594 }
595
596 if (IsConstScalar && OPK == OP_Shl) {
597 if (((NewOpEltSize == 8) && (SplatVal > 0) && (SplatVal < 8)) ||
598 ((NewOpEltSize == 16) && (SplatVal > 0) && (SplatVal < 16))) {
599 SplatVal = 1 << SplatVal;
600 OPK = OP_Mul;
601 } else {
602 return false;
603 }
604 } else if (!IsConstScalar && OPK == OP_Shl) {
605 return false;
606 }
607
608 Intrinsic::ID IntId = getIntrinsic(OPK, IsOP1ZExt, IsOP2ZExt, NewOpEltSize,
609 NewResEltSize, IsConstScalar, ConstOpNum);
610
611 if (IsConstScalar) {
612 updateMPYConst(IntId, SplatVal, IsOP2ZExt, NewOP2, IRB);
613 }
614
615 // Split the node if it needs more than a vector pair for the result.
616 if (ResVLen > 2 * HwVLen) {
617 assert(ResVLen == 4 * HwVLen);
618 // Split the operands
619 unsigned HalfElts = NumElts / 2;
620 auto *NewOpType =
621 FixedVectorType::get(getElementTy(NewOpEltSize, IRB), HalfElts);
622 auto *ResType =
623 FixedVectorType::get(getElementTy(NewResEltSize, IRB), HalfElts);
624 std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst, NewOpType);
625 std::pair<Value *, Value *> SplitOP2;
626 if (IsConstScalar && (IntId == Intrinsic::hexagon_vmpy_h_h ||
627 IntId == Intrinsic::hexagon_vmpy_uh_uh)) {
628 SplitOP2 = std::pair<Value *, Value *>(NewOP2, NewOP2);
629 } else {
630 SplitOP2 = opSplit(NewOP2, Inst, NewOpType);
631 }
632 Value *NewInHi = createIntrinsic(IntId, Inst, SplitOP1.first,
633 SplitOP2.first, ResType, HalfElts, true);
634 Value *NewInLo = createIntrinsic(IntId, Inst, SplitOP1.second,
635 SplitOP2.second, ResType, HalfElts, true);
636 assert(InstEltSize == NewResEltSize);
637 SmallVector<Constant *, 8> ShuffleMask;
638 for (unsigned i = 0; i < NumElts; ++i)
639 ShuffleMask.push_back(IRB.getInt32(i));
640 // Concat Hi and Lo.
641 Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,
642 ConstantVector::get(ShuffleMask));
643
644 Inst->replaceAllUsesWith(NewIn);
645 return true;
646 }
647
648 auto *ResType =
649 FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
650 Value *NewIn =
651 createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);
652 if (InstEltSize > NewResEltSize)
653 NewIn = (IsResZExt) ? IRB.CreateZExt(NewIn, InstTy)
654 : IRB.CreateSExt(NewIn, InstTy);
655
656 Inst->replaceAllUsesWith(NewIn);
657
658 return true;
659}
660
661// Process instruction and replace them with widening vector
662// intrinsics if possible.
663bool HexagonGenWideningVecInstr::processInstruction(Instruction *Inst) {
664 Type *InstTy = Inst->getType();
665 if (!InstTy->isVectorTy() ||
666 cast<FixedVectorType>(InstTy)->getNumElements() > 128)
667 return false;
668 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
669 if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)
670 return false;
671 if (InstLen < HwVLen && !WidenShortVector)
672 return false;
673
674 using namespace PatternMatch;
675
676 OPKind OPK;
677 Value *OP1 = nullptr, *OP2 = nullptr;
678 if (match(Inst, (m_Sub(m_Value(OP1), m_Value(OP2)))))
679 OPK = OP_Sub;
680 else if (match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
681 OPK = OP_Add;
682 else if (match(Inst, (m_Mul(m_Value(OP1), m_Value(OP2)))))
683 OPK = OP_Mul;
684 else if (match(Inst, (m_Shl(m_Value(OP1), m_Value(OP2)))))
685 OPK = OP_Shl;
686 else
687 return false;
688
689 OPInfo OP1Info, OP2Info;
690
691 if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))
692 return false;
693
694 // Proceed only if both input operands are extended.
695 if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)
696 return false;
697
698 return replaceWithIntrinsic(Inst, OPK, OP1Info, OP2Info);
699}
700
701bool HexagonGenWideningVecInstr::getVmpaOperandInfo(Value *V, OPInfo &OPI) {
702 using namespace PatternMatch;
703 OPI.OP = V;
704 Value *ExtV, *OP1 = nullptr;
705
706 if (match(V,
708 m_Poison(), m_ZeroMask()))) ||
709 match(V,
711 m_Poison(), m_ZeroMask()))) {
712 OPI.ExtInOP = ExtV;
713 OPI.IsZExt = true;
714 OPI.IsScalar = true;
715 OPI.ExtInSize = ExtV->getType()->getPrimitiveSizeInBits();
716 return true;
717 }
718
719 ConstantInt *I = nullptr;
720 if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()),
721 m_Poison(), m_ZeroMask())))) {
722 if (match(ExtV, m_And(m_Value(OP1), m_ConstantInt(I)))) {
723 uint32_t IValue = I->getZExtValue();
724 if (IValue <= 255) {
725 OPI.ExtInOP = ExtV;
726 OPI.IsZExt = true;
727 OPI.ExtInSize = 8;
728 OPI.IsScalar = true;
729 return true;
730 }
731 }
732 }
733
734 // Match for non-scalar operands
735 return getOperandInfo(V, OPI);
736}
737
738// Process instruction and replace with the vmpa intrinsic if possible.
739bool HexagonGenWideningVecInstr::processInstructionForVMPA(Instruction *Inst) {
740 using namespace PatternMatch;
741 Type *InstTy = Inst->getType();
742 // TODO: Extend it to handle short vector instructions (< HwVLen).
743 // vmpa instructions produce a vector register pair.
744 if (!InstTy->isVectorTy() || InstTy->getPrimitiveSizeInBits() != 2 * HwVLen)
745 return false;
746
747 Value *OP1 = nullptr, *OP2 = nullptr;
748 if (!match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
749 return false;
750
751 Value *OP[4] = {nullptr, nullptr, nullptr, nullptr};
752 if (!match(OP1, m_Mul(m_Value(OP[0]), m_Value(OP[1]))) ||
753 !match(OP2, m_Mul(m_Value(OP[2]), m_Value(OP[3]))))
754 return false;
755
756 OPInfo OP_Info[4];
757 for (unsigned i = 0; i < 4; i++)
758 if (!getVmpaOperandInfo(OP[i], OP_Info[i]) || !OP_Info[i].ExtInOP)
759 return false;
760
761 return replaceWithVmpaIntrinsic(Inst, OP_Info);
762}
763
764// Reorder operand info in OPI so that the vector operands come before their
765// scalar counterparts.
766void HexagonGenWideningVecInstr::reorderVmpaOperands(OPInfo *OPI) {
767 for (unsigned i = 0; i < 2; i++)
768 if (!OPI[2 * i].ExtInOP->getType()->isVectorTy()) {
769 OPInfo Temp;
770 Temp = OPI[2 * i];
771 OPI[2 * i] = OPI[2 * i + 1];
772 OPI[2 * i + 1] = Temp;
773 }
774}
775
776// Only handles the case where one input to vmpa has to be a scalar
777// and another is a vector. It can be easily extended to cover
778// other types of vmpa instructions.
779bool HexagonGenWideningVecInstr::replaceWithVmpaIntrinsic(Instruction *Inst,
780 OPInfo *OPI) {
781 reorderVmpaOperands(OPI);
782
783 // After reordering of the operands in OPI, the odd elements must have
784 // IsScalar flag set to true. Also, check the even elements for non-scalars.
785 if (!OPI[1].IsScalar || !OPI[3].IsScalar || OPI[0].IsScalar ||
786 OPI[2].IsScalar)
787 return false;
788
789 OPInfo SOPI1 = OPI[1];
790 OPInfo SOPI2 = OPI[3];
791
792 // The scalar operand in the vmpa instructions needs to be an int8.
793 if (SOPI1.ExtInSize != SOPI2.ExtInSize || SOPI1.ExtInSize != 8)
794 return false;
795
796 Type *InstTy = Inst->getType();
797 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
798 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
799 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
800
801 unsigned MaxVEltSize = std::max(OPI[0].ExtInSize, OPI[2].ExtInSize);
802 unsigned NewVOpEltSize = MaxVEltSize;
803 unsigned NewResEltSize = 2 * MaxVEltSize;
804
805 if (NumElts * NewVOpEltSize < HwVLen) {
806 // Extend the operand so that we don't end up with an invalid vector size.
807 NewVOpEltSize = 2 * NewVOpEltSize;
808 NewResEltSize = 2 * NewResEltSize;
809 }
810
811 IRBuilder<> IRB(Inst);
812
813 // Construct scalar operand
814 Value *NewSOP1 = SOPI1.ExtInOP;
815 Value *NewSOP2 = SOPI2.ExtInOP;
816
817 Type *S1Ty = NewSOP1->getType();
818 Type *S2Ty = NewSOP2->getType();
819 if (S1Ty->getPrimitiveSizeInBits() < 32)
820 NewSOP1 = IRB.CreateZExt(NewSOP1, IRB.getInt32Ty());
821 if (S2Ty->getPrimitiveSizeInBits() < 32)
822 NewSOP2 = IRB.CreateZExt(NewSOP2, IRB.getInt32Ty());
823
824 Value *SHL = IRB.CreateShl(NewSOP1, IRB.getInt32(8));
825 Value *OR = IRB.CreateOr(SHL, NewSOP2);
826 Intrinsic::ID CombineIntID = Intrinsic::hexagon_A2_combine_ll;
827 Function *ExtF = Intrinsic::getOrInsertDeclaration(M, CombineIntID);
828 Value *ScalarOP = IRB.CreateCall(ExtF, {OR, OR});
829
830 // Construct vector operand
831 Value *NewVOP1 = adjustExtensionForOp(OPI[0], IRB, NewVOpEltSize, NumElts);
832 Value *NewVOP2 = adjustExtensionForOp(OPI[2], IRB, NewVOpEltSize, NumElts);
833
834 // Combine both vector operands to form the vector-pair for vmpa
835 Intrinsic::ID VCombineIntID = Intrinsic::hexagon_V6_vcombine_128B;
836 ExtF = Intrinsic::getOrInsertDeclaration(M, VCombineIntID);
837 Type *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);
838 NewVOP1 = IRB.CreateBitCast(NewVOP1, InType);
839 NewVOP2 = IRB.CreateBitCast(NewVOP2, InType);
840 Value *VecOP = IRB.CreateCall(ExtF, {NewVOP1, NewVOP2});
841
842 Intrinsic::ID VmpaIntID = (NewResEltSize == 16)
843 ? Intrinsic::hexagon_V6_vmpabus_128B
844 : Intrinsic::hexagon_V6_vmpauhb_128B;
845 ExtF = Intrinsic::getOrInsertDeclaration(M, VmpaIntID);
846 auto *ResType =
847 FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
848 Value *NewIn = IRB.CreateCall(ExtF, {VecOP, ScalarOP});
849 NewIn = IRB.CreateBitCast(NewIn, ResType);
850
851 if (InstEltSize > NewResEltSize)
852 // Extend the output to match the original instruction type.
853 NewIn = IRB.CreateSExt(NewIn, InstTy);
854
855 // Interleave elements in the output vector.
856 SmallVector<Constant *, 16> ShuffleMask;
857 unsigned HalfElts = NumElts / 2;
858 for (unsigned i = 0; i < HalfElts; ++i) {
859 ShuffleMask.push_back(IRB.getInt32(i));
860 ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
861 }
862 NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
863 ConstantVector::get(ShuffleMask));
864
865 Inst->replaceAllUsesWith(NewIn);
866 return true;
867}
868
869bool HexagonGenWideningVecInstr::genSaturatingInst(Instruction *Inst) {
870 Type *InstTy = Inst->getType();
871 assert(InstTy->isVectorTy());
872 if (InstTy->getPrimitiveSizeInBits() > HwVLen)
873 return false;
874
875 using namespace PatternMatch;
876 CmpPredicate P1, P2;
877 Value *L1 = nullptr, *T1 = nullptr, *L2 = nullptr, *T2 = nullptr,
878 *L3 = nullptr;
879 Constant *RC1 = nullptr, *FC1 = nullptr, *RC2 = nullptr, *FC2 = nullptr,
880 *RC3 = nullptr;
881
882 // Pattern of interest: ashr -> llvm.smin -> llvm.smax -> trunc
883 // Match trunc instruction
885 m_Constant(RC1))))) {
886 // Match llvm.smin instruction
888 // Match ashr instruction
889 if (match(L2, m_AShr(m_Value(L3), m_Constant(RC3)))) {
890 std::pair<int, int> MinMax;
891 // get min, max values from operatands of smin and smax
892 if (getMinMax(RC1, RC2, MinMax)) {
893 bool IsResSigned;
894 // Validate the saturating vasr pattern
895 if (isSaturatingVAsr(Inst, L2, MinMax.first, MinMax.second,
896 IsResSigned)) {
897 // Get the shift value from the ashr operand
898 ConstantInt *shift_val =
899 dyn_cast<ConstantInt>(RC3->getSplatValue());
900 if (shift_val) {
901 Value *NewIn =
902 createVAsrIntrinsic(Inst, L3, shift_val, IsResSigned);
903 Inst->replaceAllUsesWith(NewIn);
904 return true;
905 }
906 }
907 }
908 }
909 }
910 }
911
912 if (!match(Inst, (m_Trunc(m_Select(m_ICmp(P1, m_Value(L1), m_Constant(RC1)),
913 m_Value(T1), m_Constant(FC1))))) ||
914 (T1 != L1 || FC1 != RC1))
915 return false;
916
917 if (!match(L1, m_Select(m_ICmp(P2, m_Value(L2), m_Constant(RC2)), m_Value(T2),
918 m_Constant(FC2))) ||
919 (T2 != L2 || FC2 != RC2))
920 return false;
921
922 if (!((P1 == CmpInst::ICMP_SGT && P2 == CmpInst::ICMP_SLT) ||
923 (P1 == CmpInst::ICMP_SLT && P2 == CmpInst::ICMP_SGT)))
924 return false;
925
926 std::pair<int, int> MinMax;
927 if ((P1 == CmpInst::ICMP_SGT) && (P2 == CmpInst::ICMP_SLT)) {
928 if (!getMinMax(RC1, RC2, MinMax))
929 return false;
930 } else if (!getMinMax(RC2, RC1, MinMax))
931 return false;
932
933 Value *S = L2; // Value being saturated
934
935 // Only AShr instructions are handled.
936 // Also, second operand to AShr must be a scalar.
937 Value *OP1 = nullptr, *ShiftByVal = nullptr;
938 if (!match(S, m_AShr(m_Value(OP1),
939 m_Shuffle(m_InsertElt(m_Poison(), m_Value(ShiftByVal),
940 m_Zero()),
941 m_Poison(), m_ZeroMask()))))
942 return false;
943
944 bool IsResSigned;
945 if (!isSaturatingVAsr(Inst, S, MinMax.first, MinMax.second, IsResSigned))
946 return false;
947
948 Value *NewIn = createVAsrIntrinsic(Inst, OP1, ShiftByVal, IsResSigned);
949 Inst->replaceAllUsesWith(NewIn);
950 return true;
951}
952
953Value *HexagonGenWideningVecInstr::extendShiftByVal(Value *ShiftByVal,
954 IRBuilder<> &IRB) {
955 using namespace PatternMatch;
956 Value *A = nullptr;
957 if (match(ShiftByVal, m_Trunc(m_Value(A))))
958 return A;
959 return IRB.CreateZExt(ShiftByVal, IRB.getInt32Ty());
960}
961
962bool HexagonGenWideningVecInstr::getMinMax(Constant *MinC, Constant *MaxC,
963 std::pair<int, int> &MinMax) {
964 Value *SplatV;
965 if (!(SplatV = MinC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
966 return false;
967 if (!(SplatV = MaxC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
968 return false;
969
970 ConstantInt *MinI = dyn_cast<ConstantInt>(MinC->getSplatValue());
971 ConstantInt *MaxI = dyn_cast<ConstantInt>(MaxC->getSplatValue());
972 MinMax = std::pair<int, int>(MinI->getSExtValue(), MaxI->getSExtValue());
973 return true;
974}
975
976bool HexagonGenWideningVecInstr::isSaturatingVAsr(Instruction *Inst, Value *S,
977 int MinV, int MaxV,
978 bool &IsResSigned) {
979 if (MinV >= MaxV)
980 return false;
981
982 IsResSigned = true;
983 Type *InstTy = Inst->getType();
984 Type *EltTy = cast<VectorType>(InstTy)->getElementType();
985 unsigned TruncSize = EltTy->getPrimitiveSizeInBits();
986
987 int MaxRange, MinRange;
988 if (MinV < 0) { // Saturate to a signed value
989 MaxRange = (1 << (TruncSize - 1)) - 1;
990 MinRange = -(1 << (TruncSize - 1));
991 } else if (MinV == 0) { // Saturate to an unsigned value
992 MaxRange = (1 << (TruncSize)) - 1;
993 MinRange = 0;
994 IsResSigned = false;
995 } else
996 return false;
997
998 if (MinV != MinRange || MaxV != MaxRange)
999 return false;
1000
1001 auto *SInst = dyn_cast<Instruction>(S);
1002 if (SInst->getOpcode() == Instruction::AShr) {
1003 Type *SInstTy = SInst->getType();
1004 Type *SEltTy = cast<VectorType>(SInstTy)->getElementType();
1005 unsigned SInstEltSize = SEltTy->getPrimitiveSizeInBits();
1006 if (SInstEltSize != 2 * TruncSize || TruncSize > 16)
1007 return false;
1008 }
1009 return true;
1010}
1011
1012Intrinsic::ID HexagonGenWideningVecInstr::getVAsrIntrinsic(bool IsInSigned,
1013 bool IsResSigned) {
1014 if (!IsResSigned)
1015 return (IsInSigned) ? Intrinsic::hexagon_vasrsat_su
1016 : Intrinsic::hexagon_vasrsat_uu;
1017 return Intrinsic::hexagon_vasrsat_ss;
1018}
1019
1020Value *HexagonGenWideningVecInstr::createVAsrIntrinsic(Instruction *Inst,
1021 Value *VecOP,
1022 Value *ShiftByVal,
1023 bool IsResSigned) {
1024 IRBuilder<> IRB(Inst);
1025 Type *ShiftByTy = ShiftByVal->getType();
1026 if (ShiftByTy->getPrimitiveSizeInBits() < 32)
1027 ShiftByVal = extendShiftByVal(ShiftByVal, IRB);
1028
1029 Type *InstTy = Inst->getType();
1030 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
1031 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
1032 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
1033
1034 // Replace the instruction with saturating vasr intrinsic.
1035 // Since vasr with saturation interleaves elements from both input vectors,
1036 // they must be deinterleaved for output to end up in the right order.
1037 SmallVector<Constant *, 16> ShuffleMask;
1038 unsigned HalfElts = NumElts / 2;
1039 // Even elements
1040 for (unsigned i = 0; i < HalfElts; ++i)
1041 ShuffleMask.push_back(IRB.getInt32(i * 2));
1042 // Odd elements
1043 for (unsigned i = 0; i < HalfElts; ++i)
1044 ShuffleMask.push_back(IRB.getInt32(i * 2 + 1));
1045
1046 VecOP = IRB.CreateShuffleVector(VecOP, PoisonValue::get(VecOP->getType()),
1047 ConstantVector::get(ShuffleMask));
1048
1049 auto *InVecOPTy =
1050 FixedVectorType::get(getElementTy(InstEltSize * 2, IRB), HalfElts);
1051 std::pair<Value *, Value *> HiLo = opSplit(VecOP, Inst, InVecOPTy);
1052 Intrinsic::ID IntID = getVAsrIntrinsic(true, IsResSigned);
1053 Function *F = Intrinsic::getOrInsertDeclaration(M, IntID, InVecOPTy);
1054 Value *NewIn = IRB.CreateCall(F, {HiLo.first, HiLo.second, ShiftByVal});
1055 return IRB.CreateBitCast(NewIn, InstTy);
1056}
1057
1058// Generate vavg instruction.
1059bool HexagonGenWideningVecInstr::genVAvg(Instruction *Inst) {
1060 using namespace PatternMatch;
1061 Type *InstTy = Inst->getType();
1062 assert(InstTy->isVectorTy());
1063
1064 bool Match = false;
1065 Value *OP1 = nullptr, *OP2 = nullptr;
1066 bool IsSigned;
1067 if ((Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_ZExt(m_Value(OP1)),
1068 m_ZExt(m_Value(OP2))),
1069 m_SpecificInt(1)))))))
1070 IsSigned = false;
1071 if (!Match &&
1072 (Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_SExt(m_Value(OP1)),
1073 m_SExt(m_Value(OP2))),
1074 m_SpecificInt(1))))) ||
1075 match(Inst, m_LShr(m_Add(m_Value(OP1), m_Value(OP2)),
1076 m_SpecificInt(1)))))
1077 IsSigned = true;
1078
1079 if (!Match)
1080 return false;
1081
1082 unsigned OP1EltSize = getElementSizeInBits(OP1);
1083 unsigned OP2EltSize = getElementSizeInBits(OP2);
1084 unsigned NewEltSize = std::max(OP1EltSize, OP2EltSize);
1085
1086 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
1087 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
1088 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
1089
1090 // Only vectors that are either smaller, same or twice of the hardware
1091 // vector length are allowed.
1092 if (InstEltSize < NewEltSize || (InstLen > 2 * HwVLen))
1093 return false;
1094
1095 if ((InstLen > HwVLen) && (InstLen % HwVLen != 0))
1096 return false;
1097
1098 IRBuilder<> IRB(Inst);
1099 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
1100 auto *AvgInstTy =
1101 FixedVectorType::get(getElementTy(NewEltSize, IRB), NumElts);
1102 if (OP1EltSize < NewEltSize)
1103 OP1 = (IsSigned) ? IRB.CreateSExt(OP1, AvgInstTy)
1104 : IRB.CreateZExt(OP1, AvgInstTy);
1105 if (OP2EltSize < NewEltSize)
1106 OP2 = (IsSigned) ? IRB.CreateSExt(OP2, AvgInstTy)
1107 : IRB.CreateZExt(OP2, AvgInstTy);
1108
1109 Intrinsic::ID AvgIntID =
1110 (IsSigned) ? Intrinsic::hexagon_vavgs : Intrinsic::hexagon_vavgu;
1111 Value *NewIn = nullptr;
1112
1113 // Split operands if they need more than a vector length.
1114 if (NewEltSize * NumElts > HwVLen) {
1115 unsigned HalfElts = NumElts / 2;
1116 auto *ResType =
1117 FixedVectorType::get(getElementTy(NewEltSize, IRB), HalfElts);
1118 std::pair<Value *, Value *> SplitOP1 = opSplit(OP1, Inst, ResType);
1119 std::pair<Value *, Value *> SplitOP2 = opSplit(OP2, Inst, ResType);
1120 Value *NewHi = createIntrinsic(AvgIntID, Inst, SplitOP1.first,
1121 SplitOP2.first, ResType, NumElts, false);
1122 Value *NewLo = createIntrinsic(AvgIntID, Inst, SplitOP1.second,
1123 SplitOP2.second, ResType, NumElts, false);
1124 SmallVector<Constant *, 8> ShuffleMask;
1125 for (unsigned i = 0; i < NumElts; ++i)
1126 ShuffleMask.push_back(IRB.getInt32(i));
1127 // Concat Hi and Lo.
1128 NewIn =
1129 IRB.CreateShuffleVector(NewLo, NewHi, ConstantVector::get(ShuffleMask));
1130 } else
1131 NewIn =
1132 createIntrinsic(AvgIntID, Inst, OP1, OP2, AvgInstTy, NumElts, false);
1133
1134 if (InstEltSize > NewEltSize)
1135 // Extend the output to match the original instruction type.
1136 NewIn = (IsSigned) ? IRB.CreateSExt(NewIn, InstTy)
1137 : IRB.CreateZExt(NewIn, InstTy);
1138 Inst->replaceAllUsesWith(NewIn);
1139 return true;
1140}
1141
1142bool HexagonGenWideningVecInstr::visitBlock(BasicBlock *B) {
1143 bool Changed = false;
1144 for (auto &I : *B) {
1145 Type *InstTy = I.getType();
1146 if (!InstTy->isVectorTy() || !HST->isTypeForHVX(cast<VectorType>(InstTy)))
1147 continue;
1148
1149 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
1150 if (InstLen < HwVLen && !WidenShortVector)
1151 continue;
1152
1153 Changed |= processInstructionForVMPA(&I);
1154 Changed |= genSaturatingInst(&I);
1155 Changed |= genVAvg(&I);
1156 }
1157 // Generate widening instructions.
1158 for (auto &I : *B)
1159 Changed |= processInstruction(&I);
1160 return Changed;
1161}
1162
1163bool HexagonGenWideningVecInstr::runOnFunction(Function &F) {
1164 M = F.getParent();
1165 HST = TM->getSubtargetImpl(F);
1166
1167 // Return if useHVX128BOps is not set. It can be enabled for 64B mode
1168 // but wil require some changes. For example, bitcast for intrinsics
1169 // assumes 128B mode.
1170 if (skipFunction(F) || !HST->useHVX128BOps())
1171 return false;
1172
1173 HwVLen = HST->getVectorLength() * 8; // Vector Length in bits
1174 bool Changed = false;
1175 for (auto &B : F)
1176 Changed |= visitBlock(&B);
1177
1178 return Changed;
1179}
1180
1181FunctionPass *
1183 return new HexagonGenWideningVecInstr(&TM);
1184}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
widening Hexagon generate widening vector static false bool hasNegativeValues(Constant *C)
static cl::opt< bool > WidenShortVector("hexagon-widen-short-vector", cl::desc("Generate widening instructions for short vectors."), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define T1
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define OP(OPC)
Definition Instruction.h:46
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVectorLength() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
const HexagonSubtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2579
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2097
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition IRBuilder.h:557
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition IRBuilder.h:533
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2601
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:1708
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
constexpr double e
support::detail::packed_endian_specific_integral< T, E, support::unaligned > packed
Definition SFrame.h:84
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
void initializeHexagonGenWideningVecInstrPass(PassRegistry &)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
FunctionPass * createHexagonGenWideningVecInstr(const HexagonTargetMachine &)