LLVM 22.0.0git
HexagonGenWideningVecFloatInstr.cpp
Go to the documentation of this file.
1//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Replace widening vector float operations with hexagon intrinsics.
10//
11//===----------------------------------------------------------------------===//
12//
13// Brief overview of working of GenWideningVecFloatInstr pass.
14// This version of pass is replica of already existing pass(which will replace
15// widen vector integer operations with it's respective intrinsics). In this
16// pass we will generate hexagon intrinsics for widen vector float instructions.
17//
18// Example1(64 vector-width widening):
19// %wide.load = load <64 x half>, <64 x half>* %0, align 2
20// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
21// %1 = fpext <64 x half> %wide.load to <64 x float>
22// %3 = fpext <64 x half> %wide.load53 to <64 x float>
23// %4 = fmul <64 x float> %1, %3
24//
25// If we run this pass on the above example, it will first find fmul
26// instruction, and then it will check whether the operands of fmul instruction
27// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext]
28// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext].
29// If it sees such pattern, then this pass will replace such pattern with
30// appropriate hexagon intrinsics.
31//
32// After replacement:
33// %wide.load = load <64 x half>, <64 x half>* %0, align 2
34// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
35// %3 = bitcast <64 x half> %wide.load to <32 x i32>
36// %4 = bitcast <64 x half> %wide.load53 to <32 x i32>
37// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4)
38// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1
39// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6)
40// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6)
41// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7)
42// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8)
43// %11 = bitcast <32 x i32> %9 to <32 x float>
44// %12 = bitcast <32 x i32> %10 to <32 x float>
45// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2
46//
47//
48//
49// Example2(128 vector-width widening):
50// %0 = bitcast half* %a to <128 x half>*
51// %wide.load = load <128 x half>, <128 x half>* %0, align 2
52// %1 = fpext <128 x half> %wide.load to <128 x float>
53// %2 = bitcast half* %b to <128 x half>*
54// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2
55// %3 = fpext <128 x half> %wide.load2 to <128 x float>
56// %4 = fmul <128 x float> %1, %3
57//
58// After replacement:
59// %0 = bitcast half* %a to <128 x half>*
60// %wide.load = load <128 x half>, <128 x half>* %0, align 2
61// %1 = bitcast half* %b to <128 x half>*
62// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2
63// %2 = bitcast <128 x half> %wide.load to <64 x i32>
64// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2)
65// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2)
66// %5 = bitcast <128 x half> %wide.load2 to <64 x i32>
67// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5)
68// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5)
69// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6)
70// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1
71// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9)
72// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9)
73// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10)
74// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11)
75// %14 = bitcast <32 x i32> %12 to <32 x float>
76// %15 = bitcast <32 x i32> %13 to <32 x float>
77// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2
78// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7)
79// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1
80// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18)
81// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18)
82// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19)
83// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20)
84// %23 = bitcast <32 x i32> %21 to <32 x float>
85// %24 = bitcast <32 x i32> %22 to <32 x float>
86// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2
87// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3
88//
89//
90//===----------------------------------------------------------------------===//
92#include "llvm/ADT/APInt.h"
93#include "llvm/IR/BasicBlock.h"
94#include "llvm/IR/Constants.h"
95#include "llvm/IR/Function.h"
96#include "llvm/IR/IRBuilder.h"
97#include "llvm/IR/Instruction.h"
99#include "llvm/IR/IntrinsicsHexagon.h"
100#include "llvm/IR/PatternMatch.h"
101#include "llvm/IR/Type.h"
102#include "llvm/IR/Value.h"
104#include "llvm/Pass.h"
105#include <algorithm>
106#include <utility>
107
108using namespace llvm;
109
110namespace llvm {
114} // end namespace llvm
115
116namespace {
117
118class HexagonGenWideningVecFloatInstr : public FunctionPass {
119public:
120 static char ID;
121
122 HexagonGenWideningVecFloatInstr() : FunctionPass(ID) {
125 }
126
127 HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM)
128 : FunctionPass(ID), TM(TM) {
131 }
132
133 StringRef getPassName() const override {
134 return "Hexagon generate widening vector float instructions";
135 }
136
137 bool runOnFunction(Function &F) override;
138
139 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 FunctionPass::getAnalysisUsage(AU);
141 }
142
143private:
144 Module *M = nullptr;
145 const HexagonTargetMachine *TM = nullptr;
146 const HexagonSubtarget *HST = nullptr;
147 unsigned HwVLen;
148 unsigned NumHalfEltsInFullVec;
149
150 struct OPInfo {
151 Value *OP;
152 Value *ExtInOP;
153 unsigned ExtInSize;
154 };
155
156 bool visitBlock(BasicBlock *B);
157 bool processInstruction(Instruction *Inst);
158 bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info,
159 OPInfo &OP2Info);
160
161 bool getOperandInfo(Value *V, OPInfo &OPI);
162 bool isExtendedConstant(Constant *C);
163 unsigned getElementSizeInBits(Value *V);
164 Type *getElementTy(unsigned size, IRBuilder<> &IRB);
165
166 Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
167 unsigned NewEltsize, unsigned NumElts);
168
169 std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst);
170
171 Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
172 Value *NewOP2, FixedVectorType *ResType,
173 unsigned NumElts, bool BitCastOp);
174};
175
176} // end anonymous namespace
177
178char HexagonGenWideningVecFloatInstr::ID = 0;
179
180INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float",
181 "Hexagon generate "
182 "widening vector float instructions",
183 false, false)
185INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float",
186 "Hexagon generate "
187 "widening vector float instructions",
189
190bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) {
191 if (Value *SplatV = C->getSplatValue()) {
192 if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) {
193 bool Ignored;
194 APFloat APF = CFP->getValueAPF();
195 APFloat::opStatus sts = APF.convert(
196 APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
197 if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact)
198 return true;
199 }
200 return false;
201 }
202 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
203 for (unsigned i = 0, e = NumElts; i != e; ++i) {
204 if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) {
205 bool Ignored;
206 APFloat APF = CFP->getValueAPF();
207 APFloat::opStatus sts = APF.convert(
210 return false;
211 continue;
212 }
213 return false;
214 }
215 return true;
216}
217
218unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) {
219 Type *ValTy = V->getType();
220 Type *EltTy = ValTy;
221 if (dyn_cast<Constant>(V)) {
222 unsigned EltSize =
223 cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits();
224 unsigned ReducedSize = EltSize / 2;
225
226 return ReducedSize;
227 }
228
229 if (ValTy->isVectorTy())
230 EltTy = cast<VectorType>(ValTy)->getElementType();
231 return EltTy->getPrimitiveSizeInBits();
232}
233
234bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) {
235 using namespace PatternMatch;
236 OPI.OP = V;
237 Value *ExtV = nullptr;
238 Constant *C = nullptr;
239
240 if (match(V, (m_FPExt(m_Value(ExtV)))) ||
241 match(V,
243 m_Poison(), m_ZeroMask()))) {
244
245 if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) {
246 // Matches the first branch.
247 if (ExtVType->getElementType()->isBFloatTy())
248 // do not confuse bf16 with ieee-fp16.
249 return false;
250 } else {
251 // Matches the second branch (insert element branch)
252 if (ExtV->getType()->isBFloatTy())
253 return false;
254 }
255
256 OPI.ExtInOP = ExtV;
257 OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
258 return true;
259 }
260
261 if (match(V, m_Constant(C))) {
262 if (!isExtendedConstant(C))
263 return false;
264 OPI.ExtInOP = C;
265 OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
266 return true;
267 }
268
269 return false;
270}
271
272Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size,
273 IRBuilder<> &IRB) {
274 switch (size) {
275 case 16:
276 return IRB.getHalfTy();
277 case 32:
278 return IRB.getFloatTy();
279 default:
280 llvm_unreachable("Unhandled Element size");
281 }
282}
283
284Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp(
285 OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) {
286 Value *V = OPI.ExtInOP;
287 unsigned EltSize = getElementSizeInBits(OPI.ExtInOP);
288 assert(NewExtSize >= EltSize);
289 Type *EltType = getElementTy(NewExtSize, IRB);
290 auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
291
292 if (auto *C = dyn_cast<Constant>(V))
293 return IRB.CreateFPTrunc(C, NewOpTy);
294
295 if (V->getType()->isVectorTy())
296 if (NewExtSize == EltSize)
297 return V;
298
299 return nullptr;
300}
301
302std::pair<Value *, Value *>
303HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) {
304 Type *InstTy = Inst->getType();
305 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
306 IRBuilder<> IRB(Inst);
307 Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
308 Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
311 if (NumElts == 128) {
312 auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
313 OP = IRB.CreateBitCast(OP, InType);
314 }
315 Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP});
316 Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP});
317 return std::pair<Value *, Value *>(OP1Hi, OP1Lo);
318}
319
320Value *HexagonGenWideningVecFloatInstr::createIntrinsic(
321 Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
322 FixedVectorType *ResType, unsigned NumElts, bool BitCastOp) {
323
324 IRBuilder<> IRB(Inst);
327 M, Intrinsic::hexagon_V6_vconv_sf_qf32_128B);
328 auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);
329 auto *RType = FixedVectorType::get(IRB.getFloatTy(), 32);
330
331 // Make sure inputs to vmpy instrinsic are full vectors
332 if (NumElts == NumHalfEltsInFullVec / 2) {
333 SmallVector<Constant *, 16> ConcatMask1;
334 for (unsigned i = 0; i < NumHalfEltsInFullVec; ++i)
335 ConcatMask1.push_back(IRB.getInt32(i));
336 NewOP1 =
337 IRB.CreateShuffleVector(NewOP1, PoisonValue::get(NewOP1->getType()),
338 ConstantVector::get(ConcatMask1));
339 NewOP2 =
340 IRB.CreateShuffleVector(NewOP2, PoisonValue::get(NewOP2->getType()),
341 ConstantVector::get(ConcatMask1));
342 }
343
344 if (BitCastOp) {
345 NewOP1 = IRB.CreateBitCast(NewOP1, InType);
346 NewOP2 = IRB.CreateBitCast(NewOP2, InType);
347 }
348
349 Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});
350 // Interleave the output elements to ensure correct order in Hi and Lo vectors
351 // Shuffled Mask: [0, 32, 1, 33, ..., 31, 63]
352 // Hi: [0, 1, ..., 31] and Lo: [32, 33, ..., 63]
354 unsigned HalfVecPoint = NumHalfEltsInFullVec / 2;
355 for (unsigned i = 0; i < HalfVecPoint; ++i) {
356 Mask.push_back(IRB.getInt32(i));
357 Mask.push_back(IRB.getInt32(HalfVecPoint + i));
358 }
359 NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(NewIn->getType()),
360 ConstantVector::get(Mask));
361
362 std::pair<Value *, Value *> SplitOP = opSplit(NewIn, Inst);
363 Value *ConvHi = IRB.CreateCall(ConvF, {SplitOP.first});
364 ConvHi = IRB.CreateBitCast(ConvHi, RType);
365
366 if (ResType->getNumElements() == NumHalfEltsInFullVec / 2) {
367 return ConvHi;
368 }
369
370 Value *ConvLo = IRB.CreateCall(ConvF, {SplitOP.second});
371 ConvLo = IRB.CreateBitCast(ConvLo, RType);
372
373 SmallVector<Constant *, 16> ShuffleMask;
374 for (unsigned i = 0; i < NumElts; ++i)
375 ShuffleMask.push_back(IRB.getInt32(i));
376 // Concat Hi and Lo.
377 NewIn =
378 IRB.CreateShuffleVector(ConvLo, ConvHi, ConstantVector::get(ShuffleMask));
379 return NewIn;
380}
381
382bool HexagonGenWideningVecFloatInstr::replaceWithIntrinsic(Instruction *Inst,
383 OPInfo &OP1Info,
384 OPInfo &OP2Info) {
385 Type *InstTy = Inst->getType();
386 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
387 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
388 [[maybe_unused]] unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
389
390 unsigned MaxEltSize = OP1Info.ExtInSize;
391 unsigned NewOpEltSize = MaxEltSize;
392 unsigned NewResEltSize = 2 * MaxEltSize;
393
394 unsigned ResVLen = NewResEltSize * NumElts;
395 if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))
396 return false;
397
398 Intrinsic::ID IntId = Intrinsic::hexagon_V6_vmpy_qf32_hf_128B;
399 IRBuilder<> IRB(Inst);
400 Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);
401 Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);
402
403 if (NewOP1 == nullptr || NewOP2 == nullptr)
404 return false;
405
406 if (ResVLen > 2 * HwVLen) {
407 // The code written in this if block generates the widening code when
408 // vector-width is 128:
409 //
410 // Step 1: Bitcast <128 x half> type to <64 x i32>
411 // %wide.load = load <128 x half>, <128 x half>* %0 is bitcasted to,
412 // bitcast <128 x half> %wide.load to <64 x i32>
413 //
414 // Step 2: Generate Hi and Lo vectors
415 // call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4)
416 // call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %4)
417 //
418 // Perform above 2 steps for both the operands of fmul instruction
419 //
420 // Step 3: Generate vmpy_qf32_hf multiply instruction to multiply two Hi
421 // vectors from both operands.
422 // call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%5, %8)
423 //
424 // Step 4: Convert the resultant 'qf32' output to 'sf' format
425 // %11 = shufflevector <64 x i32> %10, <64 x i32> poison, <64 x i32> Mask1
426 // %12 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %11)
427 // %13 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %11)
428 // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %12)
429 // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %13)
430 //
431 // Repeat steps 3 and 4 for mutiplication and conversion of Lo vectors.
432 // Finally merge the output values in correct sequence using shuffle
433 // vectors.
434
435 assert(ResVLen == 4 * HwVLen);
436 // Split the operands
437 unsigned HalfElts = NumElts / 2;
438 std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst);
439 std::pair<Value *, Value *> SplitOP2 = opSplit(NewOP2, Inst);
440 auto *castResType = FixedVectorType::get(IRB.getInt32Ty(), HalfElts);
441 Value *NewInHi =
442 createIntrinsic(IntId, Inst, SplitOP1.first, SplitOP2.first,
443 castResType, HalfElts, false);
444 Value *NewInLo =
445 createIntrinsic(IntId, Inst, SplitOP1.second, SplitOP2.second,
446 castResType, HalfElts, false);
447 assert(InstEltSize == NewResEltSize);
448 SmallVector<Constant *, 8> ShuffleMask;
449 for (unsigned i = 0; i < NumElts; ++i)
450 ShuffleMask.push_back(IRB.getInt32(i));
451 // Concat Hi and Lo.
452 Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,
453 ConstantVector::get(ShuffleMask));
454
455 Inst->replaceAllUsesWith(NewIn);
456 return true;
457 }
458
459 auto *ResType =
460 FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
461
462 // The following widening code can only be generated in cases where
463 // input vectors are 64xhalf/32xhalf and the results are 64xfloat/32xfloat
464 // respectively.
465 if (!(NumElts == NumHalfEltsInFullVec &&
466 ResType->getNumElements() == NumHalfEltsInFullVec) &&
467 !(NumElts == NumHalfEltsInFullVec / 2 &&
468 ResType->getNumElements() == NumHalfEltsInFullVec / 2))
469 return false;
470 Value *NewIn =
471 createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);
472
473 Inst->replaceAllUsesWith(NewIn);
474 return true;
475}
476
477// Process instruction and replace them with widening vector
478// intrinsics if possible.
479bool HexagonGenWideningVecFloatInstr::processInstruction(Instruction *Inst) {
480 Type *InstTy = Inst->getType();
481 if (!InstTy->isVectorTy() ||
482 cast<FixedVectorType>(InstTy)->getNumElements() > 128)
483 return false;
484 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
485 if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)
486 return false;
487 if (InstLen < HwVLen)
488 return false;
489
490 using namespace PatternMatch;
491
492 Value *OP1 = nullptr, *OP2 = nullptr;
493 OPInfo OP1Info, OP2Info;
494
495 // Handle the case when Inst = fpext(fmul<64xhalf>(op1, op2)). The Inst can
496 // be replaced with widening multiply.
497 if (match(Inst, (m_FPExt((m_FMul(m_Value(OP1), m_Value(OP2))))))) {
498 OP1Info.ExtInOP = OP1;
499 OP1Info.ExtInSize = getElementSizeInBits(OP1);
500 OP2Info.ExtInOP = OP2;
501 OP2Info.ExtInSize = getElementSizeInBits(OP2);
502
503 if (auto *Op1Vtype = dyn_cast<VectorType>(OP1->getType())) {
504 if (!Op1Vtype->getElementType()->isHalfTy()) {
505 return false;
506 }
507 } else {
508 return false;
509 }
510
511 if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16 &&
512 getElementSizeInBits(Inst) == 32) {
513 return replaceWithIntrinsic(Inst, OP1Info, OP2Info);
514 }
515 }
516
517 if (!match(Inst, (m_FMul(m_Value(OP1), m_Value(OP2)))))
518 return false;
519
520 if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))
521 return false;
522
523 if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)
524 return false;
525
526 if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16) {
527 return replaceWithIntrinsic(Inst, OP1Info, OP2Info);
528 }
529
530 return false;
531}
532
533bool HexagonGenWideningVecFloatInstr::visitBlock(BasicBlock *B) {
534 bool Changed = false;
535 for (auto &I : *B)
536 Changed |= processInstruction(&I);
537 return Changed;
538}
539
540bool HexagonGenWideningVecFloatInstr::runOnFunction(Function &F) {
541 M = F.getParent();
542 HST = TM->getSubtargetImpl(F);
543
544 // Return if useHVX128BOps is not set. It can be enabled for 64B mode
545 // but wil require some changes. For example, bitcast for intrinsics
546 // assumes 128B mode.
547 if (skipFunction(F) || !HST->useHVX128BOps())
548 return false;
549
550 unsigned VecLength = HST->getVectorLength(); // Vector Length in Bytes
551 HwVLen = HST->getVectorLength() * 8; // Vector Length in bits
552 NumHalfEltsInFullVec =
553 VecLength /
554 2; // Number of half (2B) elements that fit into a full HVX vector
555 bool Changed = false;
556 for (auto &B : F)
557 Changed |= visitBlock(&B);
558
559 return Changed;
560}
561
562FunctionPass *
564 return new HexagonGenWideningVecFloatInstr(&TM);
565}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define OP(OPC)
Definition Instruction.h:46
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVectorLength() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
const HexagonSubtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2165
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Definition IRBuilder.h:580
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2601
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition IRBuilder.h:590
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
constexpr double e
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionPass * createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559