| File: | build/source/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp |
| Warning: | line 1560, column 21 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | ||||
| 9 | #include "HexagonLoopIdiomRecognition.h" | |||
| 10 | #include "llvm/ADT/APInt.h" | |||
| 11 | #include "llvm/ADT/DenseMap.h" | |||
| 12 | #include "llvm/ADT/SetVector.h" | |||
| 13 | #include "llvm/ADT/SmallPtrSet.h" | |||
| 14 | #include "llvm/ADT/SmallSet.h" | |||
| 15 | #include "llvm/ADT/SmallVector.h" | |||
| 16 | #include "llvm/ADT/StringRef.h" | |||
| 17 | #include "llvm/Analysis/AliasAnalysis.h" | |||
| 18 | #include "llvm/Analysis/InstructionSimplify.h" | |||
| 19 | #include "llvm/Analysis/LoopAnalysisManager.h" | |||
| 20 | #include "llvm/Analysis/LoopInfo.h" | |||
| 21 | #include "llvm/Analysis/LoopPass.h" | |||
| 22 | #include "llvm/Analysis/MemoryLocation.h" | |||
| 23 | #include "llvm/Analysis/ScalarEvolution.h" | |||
| 24 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
| 25 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
| 26 | #include "llvm/Analysis/ValueTracking.h" | |||
| 27 | #include "llvm/IR/Attributes.h" | |||
| 28 | #include "llvm/IR/BasicBlock.h" | |||
| 29 | #include "llvm/IR/Constant.h" | |||
| 30 | #include "llvm/IR/Constants.h" | |||
| 31 | #include "llvm/IR/DataLayout.h" | |||
| 32 | #include "llvm/IR/DebugLoc.h" | |||
| 33 | #include "llvm/IR/DerivedTypes.h" | |||
| 34 | #include "llvm/IR/Dominators.h" | |||
| 35 | #include "llvm/IR/Function.h" | |||
| 36 | #include "llvm/IR/IRBuilder.h" | |||
| 37 | #include "llvm/IR/InstrTypes.h" | |||
| 38 | #include "llvm/IR/Instruction.h" | |||
| 39 | #include "llvm/IR/Instructions.h" | |||
| 40 | #include "llvm/IR/IntrinsicInst.h" | |||
| 41 | #include "llvm/IR/Intrinsics.h" | |||
| 42 | #include "llvm/IR/IntrinsicsHexagon.h" | |||
| 43 | #include "llvm/IR/Module.h" | |||
| 44 | #include "llvm/IR/PassManager.h" | |||
| 45 | #include "llvm/IR/PatternMatch.h" | |||
| 46 | #include "llvm/IR/Type.h" | |||
| 47 | #include "llvm/IR/User.h" | |||
| 48 | #include "llvm/IR/Value.h" | |||
| 49 | #include "llvm/InitializePasses.h" | |||
| 50 | #include "llvm/Pass.h" | |||
| 51 | #include "llvm/Support/Casting.h" | |||
| 52 | #include "llvm/Support/CommandLine.h" | |||
| 53 | #include "llvm/Support/Compiler.h" | |||
| 54 | #include "llvm/Support/Debug.h" | |||
| 55 | #include "llvm/Support/ErrorHandling.h" | |||
| 56 | #include "llvm/Support/KnownBits.h" | |||
| 57 | #include "llvm/Support/raw_ostream.h" | |||
| 58 | #include "llvm/TargetParser/Triple.h" | |||
| 59 | #include "llvm/Transforms/Scalar.h" | |||
| 60 | #include "llvm/Transforms/Utils.h" | |||
| 61 | #include "llvm/Transforms/Utils/Local.h" | |||
| 62 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" | |||
| 63 | #include <algorithm> | |||
| 64 | #include <array> | |||
| 65 | #include <cassert> | |||
| 66 | #include <cstdint> | |||
| 67 | #include <cstdlib> | |||
| 68 | #include <deque> | |||
| 69 | #include <functional> | |||
| 70 | #include <iterator> | |||
| 71 | #include <map> | |||
| 72 | #include <set> | |||
| 73 | #include <utility> | |||
| 74 | #include <vector> | |||
| 75 | ||||
| 76 | #define DEBUG_TYPE"hexagon-lir" "hexagon-lir" | |||
| 77 | ||||
| 78 | using namespace llvm; | |||
| 79 | ||||
| 80 | static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom", | |||
| 81 | cl::Hidden, cl::init(false), | |||
| 82 | cl::desc("Disable generation of memcpy in loop idiom recognition")); | |||
| 83 | ||||
| 84 | static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom", | |||
| 85 | cl::Hidden, cl::init(false), | |||
| 86 | cl::desc("Disable generation of memmove in loop idiom recognition")); | |||
| 87 | ||||
| 88 | static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", | |||
| 89 | cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " | |||
| 90 | "check guarding the memmove.")); | |||
| 91 | ||||
| 92 | static cl::opt<unsigned> CompileTimeMemSizeThreshold( | |||
| 93 | "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), | |||
| 94 | cl::desc("Threshold (in bytes) to perform the transformation, if the " | |||
| 95 | "runtime loop count (mem transfer size) is known at compile-time.")); | |||
| 96 | ||||
| 97 | static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom", | |||
| 98 | cl::Hidden, cl::init(true), | |||
| 99 | cl::desc("Only enable generating memmove in non-nested loops")); | |||
| 100 | ||||
| 101 | static cl::opt<bool> HexagonVolatileMemcpy( | |||
| 102 | "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), | |||
| 103 | cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); | |||
| 104 | ||||
| 105 | static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000), | |||
| 106 | cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); | |||
| 107 | ||||
| 108 | static const char *HexagonVolatileMemcpyName | |||
| 109 | = "hexagon_memcpy_forward_vp4cp4n2"; | |||
| 110 | ||||
| 111 | ||||
| 112 | namespace llvm { | |||
| 113 | ||||
| 114 | void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); | |||
| 115 | Pass *createHexagonLoopIdiomPass(); | |||
| 116 | ||||
| 117 | } // end namespace llvm | |||
| 118 | ||||
| 119 | namespace { | |||
| 120 | ||||
| 121 | class HexagonLoopIdiomRecognize { | |||
| 122 | public: | |||
| 123 | explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, | |||
| 124 | LoopInfo *LF, const TargetLibraryInfo *TLI, | |||
| 125 | ScalarEvolution *SE) | |||
| 126 | : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {} | |||
| 127 | ||||
| 128 | bool run(Loop *L); | |||
| 129 | ||||
| 130 | private: | |||
| 131 | int getSCEVStride(const SCEVAddRecExpr *StoreEv); | |||
| 132 | bool isLegalStore(Loop *CurLoop, StoreInst *SI); | |||
| 133 | void collectStores(Loop *CurLoop, BasicBlock *BB, | |||
| 134 | SmallVectorImpl<StoreInst *> &Stores); | |||
| 135 | bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount); | |||
| 136 | bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const; | |||
| 137 | bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount, | |||
| 138 | SmallVectorImpl<BasicBlock *> &ExitBlocks); | |||
| 139 | bool runOnCountableLoop(Loop *L); | |||
| 140 | ||||
| 141 | AliasAnalysis *AA; | |||
| 142 | const DataLayout *DL; | |||
| 143 | DominatorTree *DT; | |||
| 144 | LoopInfo *LF; | |||
| 145 | const TargetLibraryInfo *TLI; | |||
| 146 | ScalarEvolution *SE; | |||
| 147 | bool HasMemcpy, HasMemmove; | |||
| 148 | }; | |||
| 149 | ||||
| 150 | class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass { | |||
| 151 | public: | |||
| 152 | static char ID; | |||
| 153 | ||||
| 154 | explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) { | |||
| 155 | initializeHexagonLoopIdiomRecognizeLegacyPassPass( | |||
| 156 | *PassRegistry::getPassRegistry()); | |||
| 157 | } | |||
| 158 | ||||
| 159 | StringRef getPassName() const override { | |||
| 160 | return "Recognize Hexagon-specific loop idioms"; | |||
| 161 | } | |||
| 162 | ||||
| 163 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
| 164 | AU.addRequired<LoopInfoWrapperPass>(); | |||
| 165 | AU.addRequiredID(LoopSimplifyID); | |||
| 166 | AU.addRequiredID(LCSSAID); | |||
| 167 | AU.addRequired<AAResultsWrapperPass>(); | |||
| 168 | AU.addRequired<ScalarEvolutionWrapperPass>(); | |||
| 169 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
| 170 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
| 171 | AU.addPreserved<TargetLibraryInfoWrapperPass>(); | |||
| 172 | } | |||
| 173 | ||||
| 174 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
| 175 | }; | |||
| 176 | ||||
| 177 | struct Simplifier { | |||
| 178 | struct Rule { | |||
| 179 | using FuncType = std::function<Value *(Instruction *, LLVMContext &)>; | |||
| 180 | Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} | |||
| 181 | StringRef Name; // For debugging. | |||
| 182 | FuncType Fn; | |||
| 183 | }; | |||
| 184 | ||||
| 185 | void addRule(StringRef N, const Rule::FuncType &F) { | |||
| 186 | Rules.push_back(Rule(N, F)); | |||
| 187 | } | |||
| 188 | ||||
| 189 | private: | |||
| 190 | struct WorkListType { | |||
| 191 | WorkListType() = default; | |||
| 192 | ||||
| 193 | void push_back(Value *V) { | |||
| 194 | // Do not push back duplicates. | |||
| 195 | if (S.insert(V).second) | |||
| 196 | Q.push_back(V); | |||
| 197 | } | |||
| 198 | ||||
| 199 | Value *pop_front_val() { | |||
| 200 | Value *V = Q.front(); | |||
| 201 | Q.pop_front(); | |||
| 202 | S.erase(V); | |||
| 203 | return V; | |||
| 204 | } | |||
| 205 | ||||
| 206 | bool empty() const { return Q.empty(); } | |||
| 207 | ||||
| 208 | private: | |||
| 209 | std::deque<Value *> Q; | |||
| 210 | std::set<Value *> S; | |||
| 211 | }; | |||
| 212 | ||||
| 213 | using ValueSetType = std::set<Value *>; | |||
| 214 | ||||
| 215 | std::vector<Rule> Rules; | |||
| 216 | ||||
| 217 | public: | |||
| 218 | struct Context { | |||
| 219 | using ValueMapType = DenseMap<Value *, Value *>; | |||
| 220 | ||||
| 221 | Value *Root; | |||
| 222 | ValueSetType Used; // The set of all cloned values used by Root. | |||
| 223 | ValueSetType Clones; // The set of all cloned values. | |||
| 224 | LLVMContext &Ctx; | |||
| 225 | ||||
| 226 | Context(Instruction *Exp) | |||
| 227 | : Ctx(Exp->getParent()->getParent()->getContext()) { | |||
| 228 | initialize(Exp); | |||
| 229 | } | |||
| 230 | ||||
| 231 | ~Context() { cleanup(); } | |||
| 232 | ||||
| 233 | void print(raw_ostream &OS, const Value *V) const; | |||
| 234 | Value *materialize(BasicBlock *B, BasicBlock::iterator At); | |||
| 235 | ||||
| 236 | private: | |||
| 237 | friend struct Simplifier; | |||
| 238 | ||||
| 239 | void initialize(Instruction *Exp); | |||
| 240 | void cleanup(); | |||
| 241 | ||||
| 242 | template <typename FuncT> void traverse(Value *V, FuncT F); | |||
| 243 | void record(Value *V); | |||
| 244 | void use(Value *V); | |||
| 245 | void unuse(Value *V); | |||
| 246 | ||||
| 247 | bool equal(const Instruction *I, const Instruction *J) const; | |||
| 248 | Value *find(Value *Tree, Value *Sub) const; | |||
| 249 | Value *subst(Value *Tree, Value *OldV, Value *NewV); | |||
| 250 | void replace(Value *OldV, Value *NewV); | |||
| 251 | void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); | |||
| 252 | }; | |||
| 253 | ||||
| 254 | Value *simplify(Context &C); | |||
| 255 | }; | |||
| 256 | ||||
| 257 | struct PE { | |||
| 258 | PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} | |||
| 259 | ||||
| 260 | const Simplifier::Context &C; | |||
| 261 | const Value *V; | |||
| 262 | }; | |||
| 263 | ||||
| 264 | LLVM_ATTRIBUTE_USED__attribute__((__used__)) | |||
| 265 | raw_ostream &operator<<(raw_ostream &OS, const PE &P) { | |||
| 266 | P.C.print(OS, P.V ? P.V : P.C.Root); | |||
| 267 | return OS; | |||
| 268 | } | |||
| 269 | ||||
| 270 | } // end anonymous namespace | |||
| 271 | ||||
| 272 | char HexagonLoopIdiomRecognizeLegacyPass::ID = 0; | |||
| 273 | ||||
| 274 | INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
| 275 | "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
| 276 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | |||
| 277 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); | |||
| 278 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); | |||
| 279 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); | |||
| 280 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
| 281 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
| 282 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
| 283 | INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
| 284 | "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
| 285 | ||||
| 286 | template <typename FuncT> | |||
| 287 | void Simplifier::Context::traverse(Value *V, FuncT F) { | |||
| 288 | WorkListType Q; | |||
| 289 | Q.push_back(V); | |||
| 290 | ||||
| 291 | while (!Q.empty()) { | |||
| 292 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
| 293 | if (!U || U->getParent()) | |||
| 294 | continue; | |||
| 295 | if (!F(U)) | |||
| 296 | continue; | |||
| 297 | for (Value *Op : U->operands()) | |||
| 298 | Q.push_back(Op); | |||
| 299 | } | |||
| 300 | } | |||
| 301 | ||||
| 302 | void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { | |||
| 303 | const auto *U = dyn_cast<const Instruction>(V); | |||
| 304 | if (!U) { | |||
| 305 | OS << V << '(' << *V << ')'; | |||
| 306 | return; | |||
| 307 | } | |||
| 308 | ||||
| 309 | if (U->getParent()) { | |||
| 310 | OS << U << '('; | |||
| 311 | U->printAsOperand(OS, true); | |||
| 312 | OS << ')'; | |||
| 313 | return; | |||
| 314 | } | |||
| 315 | ||||
| 316 | unsigned N = U->getNumOperands(); | |||
| 317 | if (N != 0) | |||
| 318 | OS << U << '('; | |||
| 319 | OS << U->getOpcodeName(); | |||
| 320 | for (const Value *Op : U->operands()) { | |||
| 321 | OS << ' '; | |||
| 322 | print(OS, Op); | |||
| 323 | } | |||
| 324 | if (N != 0) | |||
| 325 | OS << ')'; | |||
| 326 | } | |||
| 327 | ||||
| 328 | void Simplifier::Context::initialize(Instruction *Exp) { | |||
| 329 | // Perform a deep clone of the expression, set Root to the root | |||
| 330 | // of the clone, and build a map from the cloned values to the | |||
| 331 | // original ones. | |||
| 332 | ValueMapType M; | |||
| 333 | BasicBlock *Block = Exp->getParent(); | |||
| 334 | WorkListType Q; | |||
| 335 | Q.push_back(Exp); | |||
| 336 | ||||
| 337 | while (!Q.empty()) { | |||
| 338 | Value *V = Q.pop_front_val(); | |||
| 339 | if (M.find(V) != M.end()) | |||
| 340 | continue; | |||
| 341 | if (Instruction *U = dyn_cast<Instruction>(V)) { | |||
| 342 | if (isa<PHINode>(U) || U->getParent() != Block) | |||
| 343 | continue; | |||
| 344 | for (Value *Op : U->operands()) | |||
| 345 | Q.push_back(Op); | |||
| 346 | M.insert({U, U->clone()}); | |||
| 347 | } | |||
| 348 | } | |||
| 349 | ||||
| 350 | for (std::pair<Value*,Value*> P : M) { | |||
| 351 | Instruction *U = cast<Instruction>(P.second); | |||
| 352 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
| 353 | auto F = M.find(U->getOperand(i)); | |||
| 354 | if (F != M.end()) | |||
| 355 | U->setOperand(i, F->second); | |||
| 356 | } | |||
| 357 | } | |||
| 358 | ||||
| 359 | auto R = M.find(Exp); | |||
| 360 | assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail ("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 360, __extension__ __PRETTY_FUNCTION__)); | |||
| 361 | Root = R->second; | |||
| 362 | ||||
| 363 | record(Root); | |||
| 364 | use(Root); | |||
| 365 | } | |||
| 366 | ||||
| 367 | void Simplifier::Context::record(Value *V) { | |||
| 368 | auto Record = [this](Instruction *U) -> bool { | |||
| 369 | Clones.insert(U); | |||
| 370 | return true; | |||
| 371 | }; | |||
| 372 | traverse(V, Record); | |||
| 373 | } | |||
| 374 | ||||
| 375 | void Simplifier::Context::use(Value *V) { | |||
| 376 | auto Use = [this](Instruction *U) -> bool { | |||
| 377 | Used.insert(U); | |||
| 378 | return true; | |||
| 379 | }; | |||
| 380 | traverse(V, Use); | |||
| 381 | } | |||
| 382 | ||||
| 383 | void Simplifier::Context::unuse(Value *V) { | |||
| 384 | if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr) | |||
| 385 | return; | |||
| 386 | ||||
| 387 | auto Unuse = [this](Instruction *U) -> bool { | |||
| 388 | if (!U->use_empty()) | |||
| 389 | return false; | |||
| 390 | Used.erase(U); | |||
| 391 | return true; | |||
| 392 | }; | |||
| 393 | traverse(V, Unuse); | |||
| 394 | } | |||
| 395 | ||||
| 396 | Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { | |||
| 397 | if (Tree == OldV) | |||
| 398 | return NewV; | |||
| 399 | if (OldV == NewV) | |||
| 400 | return Tree; | |||
| 401 | ||||
| 402 | WorkListType Q; | |||
| 403 | Q.push_back(Tree); | |||
| 404 | while (!Q.empty()) { | |||
| 405 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
| 406 | // If U is not an instruction, or it's not a clone, skip it. | |||
| 407 | if (!U || U->getParent()) | |||
| 408 | continue; | |||
| 409 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
| 410 | Value *Op = U->getOperand(i); | |||
| 411 | if (Op == OldV) { | |||
| 412 | U->setOperand(i, NewV); | |||
| 413 | unuse(OldV); | |||
| 414 | } else { | |||
| 415 | Q.push_back(Op); | |||
| 416 | } | |||
| 417 | } | |||
| 418 | } | |||
| 419 | return Tree; | |||
| 420 | } | |||
| 421 | ||||
| 422 | void Simplifier::Context::replace(Value *OldV, Value *NewV) { | |||
| 423 | if (Root == OldV) { | |||
| 424 | Root = NewV; | |||
| 425 | use(Root); | |||
| 426 | return; | |||
| 427 | } | |||
| 428 | ||||
| 429 | // NewV may be a complex tree that has just been created by one of the | |||
| 430 | // transformation rules. We need to make sure that it is commoned with | |||
| 431 | // the existing Root to the maximum extent possible. | |||
| 432 | // Identify all subtrees of NewV (including NewV itself) that have | |||
| 433 | // equivalent counterparts in Root, and replace those subtrees with | |||
| 434 | // these counterparts. | |||
| 435 | WorkListType Q; | |||
| 436 | Q.push_back(NewV); | |||
| 437 | while (!Q.empty()) { | |||
| 438 | Value *V = Q.pop_front_val(); | |||
| 439 | Instruction *U = dyn_cast<Instruction>(V); | |||
| 440 | if (!U || U->getParent()) | |||
| 441 | continue; | |||
| 442 | if (Value *DupV = find(Root, V)) { | |||
| 443 | if (DupV != V) | |||
| 444 | NewV = subst(NewV, V, DupV); | |||
| 445 | } else { | |||
| 446 | for (Value *Op : U->operands()) | |||
| 447 | Q.push_back(Op); | |||
| 448 | } | |||
| 449 | } | |||
| 450 | ||||
| 451 | // Now, simply replace OldV with NewV in Root. | |||
| 452 | Root = subst(Root, OldV, NewV); | |||
| 453 | use(Root); | |||
| 454 | } | |||
| 455 | ||||
| 456 | void Simplifier::Context::cleanup() { | |||
| 457 | for (Value *V : Clones) { | |||
| 458 | Instruction *U = cast<Instruction>(V); | |||
| 459 | if (!U->getParent()) | |||
| 460 | U->dropAllReferences(); | |||
| 461 | } | |||
| 462 | ||||
| 463 | for (Value *V : Clones) { | |||
| 464 | Instruction *U = cast<Instruction>(V); | |||
| 465 | if (!U->getParent()) | |||
| 466 | U->deleteValue(); | |||
| 467 | } | |||
| 468 | } | |||
| 469 | ||||
| 470 | bool Simplifier::Context::equal(const Instruction *I, | |||
| 471 | const Instruction *J) const { | |||
| 472 | if (I == J) | |||
| 473 | return true; | |||
| 474 | if (!I->isSameOperationAs(J)) | |||
| 475 | return false; | |||
| 476 | if (isa<PHINode>(I)) | |||
| 477 | return I->isIdenticalTo(J); | |||
| 478 | ||||
| 479 | for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { | |||
| 480 | Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); | |||
| 481 | if (OpI == OpJ) | |||
| 482 | continue; | |||
| 483 | auto *InI = dyn_cast<const Instruction>(OpI); | |||
| 484 | auto *InJ = dyn_cast<const Instruction>(OpJ); | |||
| 485 | if (InI && InJ) { | |||
| 486 | if (!equal(InI, InJ)) | |||
| 487 | return false; | |||
| 488 | } else if (InI != InJ || !InI) | |||
| 489 | return false; | |||
| 490 | } | |||
| 491 | return true; | |||
| 492 | } | |||
| 493 | ||||
| 494 | Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { | |||
| 495 | Instruction *SubI = dyn_cast<Instruction>(Sub); | |||
| 496 | WorkListType Q; | |||
| 497 | Q.push_back(Tree); | |||
| 498 | ||||
| 499 | while (!Q.empty()) { | |||
| 500 | Value *V = Q.pop_front_val(); | |||
| 501 | if (V == Sub) | |||
| 502 | return V; | |||
| 503 | Instruction *U = dyn_cast<Instruction>(V); | |||
| 504 | if (!U || U->getParent()) | |||
| 505 | continue; | |||
| 506 | if (SubI && equal(SubI, U)) | |||
| 507 | return U; | |||
| 508 | assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0) : __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 508, __extension__ __PRETTY_FUNCTION__)); | |||
| 509 | for (Value *Op : U->operands()) | |||
| 510 | Q.push_back(Op); | |||
| 511 | } | |||
| 512 | return nullptr; | |||
| 513 | } | |||
| 514 | ||||
| 515 | void Simplifier::Context::link(Instruction *I, BasicBlock *B, | |||
| 516 | BasicBlock::iterator At) { | |||
| 517 | if (I->getParent()) | |||
| 518 | return; | |||
| 519 | ||||
| 520 | for (Value *Op : I->operands()) { | |||
| 521 | if (Instruction *OpI = dyn_cast<Instruction>(Op)) | |||
| 522 | link(OpI, B, At); | |||
| 523 | } | |||
| 524 | ||||
| 525 | I->insertInto(B, At); | |||
| 526 | } | |||
| 527 | ||||
| 528 | Value *Simplifier::Context::materialize(BasicBlock *B, | |||
| 529 | BasicBlock::iterator At) { | |||
| 530 | if (Instruction *RootI = dyn_cast<Instruction>(Root)) | |||
| 531 | link(RootI, B, At); | |||
| 532 | return Root; | |||
| 533 | } | |||
| 534 | ||||
| 535 | Value *Simplifier::simplify(Context &C) { | |||
| 536 | WorkListType Q; | |||
| 537 | Q.push_back(C.Root); | |||
| 538 | unsigned Count = 0; | |||
| 539 | const unsigned Limit = SimplifyLimit; | |||
| 540 | ||||
| 541 | while (!Q.empty()) { | |||
| 542 | if (Count++ >= Limit) | |||
| 543 | break; | |||
| 544 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
| 545 | if (!U || U->getParent() || !C.Used.count(U)) | |||
| 546 | continue; | |||
| 547 | bool Changed = false; | |||
| 548 | for (Rule &R : Rules) { | |||
| 549 | Value *W = R.Fn(U, C.Ctx); | |||
| 550 | if (!W) | |||
| 551 | continue; | |||
| 552 | Changed = true; | |||
| 553 | C.record(W); | |||
| 554 | C.replace(U, W); | |||
| 555 | Q.push_back(C.Root); | |||
| 556 | break; | |||
| 557 | } | |||
| 558 | if (!Changed) { | |||
| 559 | for (Value *Op : U->operands()) | |||
| 560 | Q.push_back(Op); | |||
| 561 | } | |||
| 562 | } | |||
| 563 | return Count < Limit ? C.Root : nullptr; | |||
| 564 | } | |||
| 565 | ||||
| 566 | //===----------------------------------------------------------------------===// | |||
| 567 | // | |||
| 568 | // Implementation of PolynomialMultiplyRecognize | |||
| 569 | // | |||
| 570 | //===----------------------------------------------------------------------===// | |||
| 571 | ||||
| 572 | namespace { | |||
| 573 | ||||
| 574 | class PolynomialMultiplyRecognize { | |||
| 575 | public: | |||
| 576 | explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl, | |||
| 577 | const DominatorTree &dt, const TargetLibraryInfo &tli, | |||
| 578 | ScalarEvolution &se) | |||
| 579 | : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {} | |||
| 580 | ||||
| 581 | bool recognize(); | |||
| 582 | ||||
| 583 | private: | |||
| 584 | using ValueSeq = SetVector<Value *>; | |||
| 585 | ||||
| 586 | IntegerType *getPmpyType() const { | |||
| 587 | LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); | |||
| 588 | return IntegerType::get(Ctx, 32); | |||
| 589 | } | |||
| 590 | ||||
| 591 | bool isPromotableTo(Value *V, IntegerType *Ty); | |||
| 592 | void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); | |||
| 593 | bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); | |||
| 594 | ||||
| 595 | Value *getCountIV(BasicBlock *BB); | |||
| 596 | bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); | |||
| 597 | void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, | |||
| 598 | ValueSeq &Late); | |||
| 599 | bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late); | |||
| 600 | bool commutesWithShift(Instruction *I); | |||
| 601 | bool highBitsAreZero(Value *V, unsigned IterCount); | |||
| 602 | bool keepsHighBitsZero(Value *V, unsigned IterCount); | |||
| 603 | bool isOperandShifted(Instruction *I, Value *Op); | |||
| 604 | bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB, | |||
| 605 | unsigned IterCount); | |||
| 606 | void cleanupLoopBody(BasicBlock *LoopB); | |||
| 607 | ||||
| 608 | struct ParsedValues { | |||
| 609 | ParsedValues() = default; | |||
| 610 | ||||
| 611 | Value *M = nullptr; | |||
| 612 | Value *P = nullptr; | |||
| 613 | Value *Q = nullptr; | |||
| 614 | Value *R = nullptr; | |||
| 615 | Value *X = nullptr; | |||
| 616 | Instruction *Res = nullptr; | |||
| 617 | unsigned IterCount = 0; | |||
| 618 | bool Left = false; | |||
| 619 | bool Inv = false; | |||
| 620 | }; | |||
| 621 | ||||
| 622 | bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV); | |||
| 623 | bool matchRightShift(SelectInst *SelI, ParsedValues &PV); | |||
| 624 | bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB, | |||
| 625 | Value *CIV, ParsedValues &PV, bool PreScan); | |||
| 626 | unsigned getInverseMxN(unsigned QP); | |||
| 627 | Value *generate(BasicBlock::iterator At, ParsedValues &PV); | |||
| 628 | ||||
| 629 | void setupPreSimplifier(Simplifier &S); | |||
| 630 | void setupPostSimplifier(Simplifier &S); | |||
| 631 | ||||
| 632 | Loop *CurLoop; | |||
| 633 | const DataLayout &DL; | |||
| 634 | const DominatorTree &DT; | |||
| 635 | const TargetLibraryInfo &TLI; | |||
| 636 | ScalarEvolution &SE; | |||
| 637 | }; | |||
| 638 | ||||
| 639 | } // end anonymous namespace | |||
| 640 | ||||
| 641 | Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { | |||
| 642 | pred_iterator PI = pred_begin(BB), PE = pred_end(BB); | |||
| 643 | if (std::distance(PI, PE) != 2) | |||
| 644 | return nullptr; | |||
| 645 | BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI; | |||
| 646 | ||||
| 647 | for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { | |||
| 648 | auto *PN = cast<PHINode>(I); | |||
| 649 | Value *InitV = PN->getIncomingValueForBlock(PB); | |||
| 650 | if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero()) | |||
| 651 | continue; | |||
| 652 | Value *IterV = PN->getIncomingValueForBlock(BB); | |||
| 653 | auto *BO = dyn_cast<BinaryOperator>(IterV); | |||
| 654 | if (!BO) | |||
| 655 | continue; | |||
| 656 | if (BO->getOpcode() != Instruction::Add) | |||
| 657 | continue; | |||
| 658 | Value *IncV = nullptr; | |||
| 659 | if (BO->getOperand(0) == PN) | |||
| 660 | IncV = BO->getOperand(1); | |||
| 661 | else if (BO->getOperand(1) == PN) | |||
| 662 | IncV = BO->getOperand(0); | |||
| 663 | if (IncV == nullptr) | |||
| 664 | continue; | |||
| 665 | ||||
| 666 | if (auto *T = dyn_cast<ConstantInt>(IncV)) | |||
| 667 | if (T->getZExtValue() == 1) | |||
| 668 | return PN; | |||
| 669 | } | |||
| 670 | return nullptr; | |||
| 671 | } | |||
| 672 | ||||
| 673 | static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) { | |||
| 674 | for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { | |||
| 675 | Use &TheUse = UI.getUse(); | |||
| 676 | ++UI; | |||
| 677 | if (auto *II = dyn_cast<Instruction>(TheUse.getUser())) | |||
| 678 | if (BB == II->getParent()) | |||
| 679 | II->replaceUsesOfWith(I, J); | |||
| 680 | } | |||
| 681 | } | |||
| 682 | ||||
| 683 | bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI, | |||
| 684 | Value *CIV, ParsedValues &PV) { | |||
| 685 | // Match the following: | |||
| 686 | // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R | |||
| 687 | // select (X & (1 << i)) == 0 ? R : R ^ (Q << i) | |||
| 688 | // The condition may also check for equality with the masked value, i.e | |||
| 689 | // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R | |||
| 690 | // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i); | |||
| 691 | ||||
| 692 | Value *CondV = SelI->getCondition(); | |||
| 693 | Value *TrueV = SelI->getTrueValue(); | |||
| 694 | Value *FalseV = SelI->getFalseValue(); | |||
| 695 | ||||
| 696 | using namespace PatternMatch; | |||
| 697 | ||||
| 698 | CmpInst::Predicate P; | |||
| 699 | Value *A = nullptr, *B = nullptr, *C = nullptr; | |||
| 700 | ||||
| 701 | if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) && | |||
| 702 | !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B))))) | |||
| 703 | return false; | |||
| 704 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
| 705 | return false; | |||
| 706 | // Matched: select (A & B) == C ? ... : ... | |||
| 707 | // select (A & B) != C ? ... : ... | |||
| 708 | ||||
| 709 | Value *X = nullptr, *Sh1 = nullptr; | |||
| 710 | // Check (A & B) for (X & (1 << i)): | |||
| 711 | if (match(A, m_Shl(m_One(), m_Specific(CIV)))) { | |||
| 712 | Sh1 = A; | |||
| 713 | X = B; | |||
| 714 | } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) { | |||
| 715 | Sh1 = B; | |||
| 716 | X = A; | |||
| 717 | } else { | |||
| 718 | // TODO: Could also check for an induction variable containing single | |||
| 719 | // bit shifted left by 1 in each iteration. | |||
| 720 | return false; | |||
| 721 | } | |||
| 722 | ||||
| 723 | bool TrueIfZero; | |||
| 724 | ||||
| 725 | // Check C against the possible values for comparison: 0 and (1 << i): | |||
| 726 | if (match(C, m_Zero())) | |||
| 727 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
| 728 | else if (C == Sh1) | |||
| 729 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
| 730 | else | |||
| 731 | return false; | |||
| 732 | ||||
| 733 | // So far, matched: | |||
| 734 | // select (X & (1 << i)) ? ... : ... | |||
| 735 | // including variations of the check against zero/non-zero value. | |||
| 736 | ||||
| 737 | Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr; | |||
| 738 | if (TrueIfZero) { | |||
| 739 | ShouldSameV = TrueV; | |||
| 740 | ShouldXoredV = FalseV; | |||
| 741 | } else { | |||
| 742 | ShouldSameV = FalseV; | |||
| 743 | ShouldXoredV = TrueV; | |||
| 744 | } | |||
| 745 | ||||
| 746 | Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr; | |||
| 747 | Value *T = nullptr; | |||
| 748 | if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) { | |||
| 749 | // Matched: select +++ ? ... : Y ^ Z | |||
| 750 | // select +++ ? Y ^ Z : ... | |||
| 751 | // where +++ denotes previously checked matches. | |||
| 752 | if (ShouldSameV == Y) | |||
| 753 | T = Z; | |||
| 754 | else if (ShouldSameV == Z) | |||
| 755 | T = Y; | |||
| 756 | else | |||
| 757 | return false; | |||
| 758 | R = ShouldSameV; | |||
| 759 | // Matched: select +++ ? R : R ^ T | |||
| 760 | // select +++ ? R ^ T : R | |||
| 761 | // depending on TrueIfZero. | |||
| 762 | ||||
| 763 | } else if (match(ShouldSameV, m_Zero())) { | |||
| 764 | // Matched: select +++ ? 0 : ... | |||
| 765 | // select +++ ? ... : 0 | |||
| 766 | if (!SelI->hasOneUse()) | |||
| 767 | return false; | |||
| 768 | T = ShouldXoredV; | |||
| 769 | // Matched: select +++ ? 0 : T | |||
| 770 | // select +++ ? T : 0 | |||
| 771 | ||||
| 772 | Value *U = *SelI->user_begin(); | |||
| 773 | if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) && | |||
| 774 | !match(U, m_Xor(m_Value(R), m_Specific(SelI)))) | |||
| 775 | return false; | |||
| 776 | // Matched: xor (select +++ ? 0 : T), R | |||
| 777 | // xor (select +++ ? T : 0), R | |||
| 778 | } else | |||
| 779 | return false; | |||
| 780 | ||||
| 781 | // The xor input value T is isolated into its own match so that it could | |||
| 782 | // be checked against an induction variable containing a shifted bit | |||
| 783 | // (todo). | |||
| 784 | // For now, check against (Q << i). | |||
| 785 | if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) && | |||
| 786 | !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV))))) | |||
| 787 | return false; | |||
| 788 | // Matched: select +++ ? R : R ^ (Q << i) | |||
| 789 | // select +++ ? R ^ (Q << i) : R | |||
| 790 | ||||
| 791 | PV.X = X; | |||
| 792 | PV.Q = Q; | |||
| 793 | PV.R = R; | |||
| 794 | PV.Left = true; | |||
| 795 | return true; | |||
| 796 | } | |||
| 797 | ||||
| 798 | bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI, | |||
| 799 | ParsedValues &PV) { | |||
| 800 | // Match the following: | |||
| 801 | // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1) | |||
| 802 | // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q | |||
| 803 | // The condition may also check for equality with the masked value, i.e | |||
| 804 | // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1) | |||
| 805 | // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q | |||
| 806 | ||||
| 807 | Value *CondV = SelI->getCondition(); | |||
| 808 | Value *TrueV = SelI->getTrueValue(); | |||
| 809 | Value *FalseV = SelI->getFalseValue(); | |||
| 810 | ||||
| 811 | using namespace PatternMatch; | |||
| 812 | ||||
| 813 | Value *C = nullptr; | |||
| 814 | CmpInst::Predicate P; | |||
| 815 | bool TrueIfZero; | |||
| 816 | ||||
| 817 | if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) || | |||
| 818 | match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) { | |||
| 819 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
| 820 | return false; | |||
| 821 | // Matched: select C == 0 ? ... : ... | |||
| 822 | // select C != 0 ? ... : ... | |||
| 823 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
| 824 | } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) || | |||
| 825 | match(CondV, m_ICmp(P, m_One(), m_Value(C)))) { | |||
| 826 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
| 827 | return false; | |||
| 828 | // Matched: select C == 1 ? ... : ... | |||
| 829 | // select C != 1 ? ... : ... | |||
| 830 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
| 831 | } else | |||
| 832 | return false; | |||
| 833 | ||||
| 834 | Value *X = nullptr; | |||
| 835 | if (!match(C, m_And(m_Value(X), m_One())) && | |||
| 836 | !match(C, m_And(m_One(), m_Value(X)))) | |||
| 837 | return false; | |||
| 838 | // Matched: select (X & 1) == +++ ? ... : ... | |||
| 839 | // select (X & 1) != +++ ? ... : ... | |||
| 840 | ||||
| 841 | Value *R = nullptr, *Q = nullptr; | |||
| 842 | if (TrueIfZero) { | |||
| 843 | // The select's condition is true if the tested bit is 0. | |||
| 844 | // TrueV must be the shift, FalseV must be the xor. | |||
| 845 | if (!match(TrueV, m_LShr(m_Value(R), m_One()))) | |||
| 846 | return false; | |||
| 847 | // Matched: select +++ ? (R >> 1) : ... | |||
| 848 | if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) && | |||
| 849 | !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV)))) | |||
| 850 | return false; | |||
| 851 | // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q | |||
| 852 | // with commuting ^. | |||
| 853 | } else { | |||
| 854 | // The select's condition is true if the tested bit is 1. | |||
| 855 | // TrueV must be the xor, FalseV must be the shift. | |||
| 856 | if (!match(FalseV, m_LShr(m_Value(R), m_One()))) | |||
| 857 | return false; | |||
| 858 | // Matched: select +++ ? ... : (R >> 1) | |||
| 859 | if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) && | |||
| 860 | !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV)))) | |||
| 861 | return false; | |||
| 862 | // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1) | |||
| 863 | // with commuting ^. | |||
| 864 | } | |||
| 865 | ||||
| 866 | PV.X = X; | |||
| 867 | PV.Q = Q; | |||
| 868 | PV.R = R; | |||
| 869 | PV.Left = false; | |||
| 870 | return true; | |||
| 871 | } | |||
| 872 | ||||
| 873 | bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, | |||
| 874 | BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, | |||
| 875 | bool PreScan) { | |||
| 876 | using namespace PatternMatch; | |||
| 877 | ||||
| 878 | // The basic pattern for R = P.Q is: | |||
| 879 | // for i = 0..31 | |||
| 880 | // R = phi (0, R') | |||
| 881 | // if (P & (1 << i)) ; test-bit(P, i) | |||
| 882 | // R' = R ^ (Q << i) | |||
| 883 | // | |||
| 884 | // Similarly, the basic pattern for R = (P/Q).Q - P | |||
| 885 | // for i = 0..31 | |||
| 886 | // R = phi(P, R') | |||
| 887 | // if (R & (1 << i)) | |||
| 888 | // R' = R ^ (Q << i) | |||
| 889 | ||||
| 890 | // There exist idioms, where instead of Q being shifted left, P is shifted | |||
| 891 | // right. This produces a result that is shifted right by 32 bits (the | |||
| 892 | // non-shifted result is 64-bit). | |||
| 893 | // | |||
| 894 | // For R = P.Q, this would be: | |||
| 895 | // for i = 0..31 | |||
| 896 | // R = phi (0, R') | |||
| 897 | // if ((P >> i) & 1) | |||
| 898 | // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must | |||
| 899 | // else ; be shifted by 1, not i. | |||
| 900 | // R' = R >> 1 | |||
| 901 | // | |||
| 902 | // And for the inverse: | |||
| 903 | // for i = 0..31 | |||
| 904 | // R = phi (P, R') | |||
| 905 | // if (R & 1) | |||
| 906 | // R' = (R >> 1) ^ Q | |||
| 907 | // else | |||
| 908 | // R' = R >> 1 | |||
| 909 | ||||
| 910 | // The left-shifting idioms share the same pattern: | |||
| 911 | // select (X & (1 << i)) ? R ^ (Q << i) : R | |||
| 912 | // Similarly for right-shifting idioms: | |||
| 913 | // select (X & 1) ? (R >> 1) ^ Q | |||
| 914 | ||||
| 915 | if (matchLeftShift(SelI, CIV, PV)) { | |||
| 916 | // If this is a pre-scan, getting this far is sufficient. | |||
| 917 | if (PreScan) | |||
| 918 | return true; | |||
| 919 | ||||
| 920 | // Need to make sure that the SelI goes back into R. | |||
| 921 | auto *RPhi = dyn_cast<PHINode>(PV.R); | |||
| 922 | if (!RPhi) | |||
| 923 | return false; | |||
| 924 | if (SelI != RPhi->getIncomingValueForBlock(LoopB)) | |||
| 925 | return false; | |||
| 926 | PV.Res = SelI; | |||
| 927 | ||||
| 928 | // If X is loop invariant, it must be the input polynomial, and the | |||
| 929 | // idiom is the basic polynomial multiply. | |||
| 930 | if (CurLoop->isLoopInvariant(PV.X)) { | |||
| 931 | PV.P = PV.X; | |||
| 932 | PV.Inv = false; | |||
| 933 | } else { | |||
| 934 | // X is not loop invariant. If X == R, this is the inverse pmpy. | |||
| 935 | // Otherwise, check for an xor with an invariant value. If the | |||
| 936 | // variable argument to the xor is R, then this is still a valid | |||
| 937 | // inverse pmpy. | |||
| 938 | PV.Inv = true; | |||
| 939 | if (PV.X != PV.R) { | |||
| 940 | Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr; | |||
| 941 | if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2)))) | |||
| 942 | return false; | |||
| 943 | auto *I1 = dyn_cast<Instruction>(X1); | |||
| 944 | auto *I2 = dyn_cast<Instruction>(X2); | |||
| 945 | if (!I1 || I1->getParent() != LoopB) { | |||
| 946 | Var = X2; | |||
| 947 | Inv = X1; | |||
| 948 | } else if (!I2 || I2->getParent() != LoopB) { | |||
| 949 | Var = X1; | |||
| 950 | Inv = X2; | |||
| 951 | } else | |||
| 952 | return false; | |||
| 953 | if (Var != PV.R) | |||
| 954 | return false; | |||
| 955 | PV.M = Inv; | |||
| 956 | } | |||
| 957 | // The input polynomial P still needs to be determined. It will be | |||
| 958 | // the entry value of R. | |||
| 959 | Value *EntryP = RPhi->getIncomingValueForBlock(PrehB); | |||
| 960 | PV.P = EntryP; | |||
| 961 | } | |||
| 962 | ||||
| 963 | return true; | |||
| 964 | } | |||
| 965 | ||||
| 966 | if (matchRightShift(SelI, PV)) { | |||
| 967 | // If this is an inverse pattern, the Q polynomial must be known at | |||
| 968 | // compile time. | |||
| 969 | if (PV.Inv && !isa<ConstantInt>(PV.Q)) | |||
| 970 | return false; | |||
| 971 | if (PreScan) | |||
| 972 | return true; | |||
| 973 | // There is no exact matching of right-shift pmpy. | |||
| 974 | return false; | |||
| 975 | } | |||
| 976 | ||||
| 977 | return false; | |||
| 978 | } | |||
| 979 | ||||
| 980 | bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, | |||
| 981 | IntegerType *DestTy) { | |||
| 982 | IntegerType *T = dyn_cast<IntegerType>(Val->getType()); | |||
| 983 | if (!T || T->getBitWidth() > DestTy->getBitWidth()) | |||
| 984 | return false; | |||
| 985 | if (T->getBitWidth() == DestTy->getBitWidth()) | |||
| 986 | return true; | |||
| 987 | // Non-instructions are promotable. The reason why an instruction may not | |||
| 988 | // be promotable is that it may produce a different result if its operands | |||
| 989 | // and the result are promoted, for example, it may produce more non-zero | |||
| 990 | // bits. While it would still be possible to represent the proper result | |||
| 991 | // in a wider type, it may require adding additional instructions (which | |||
| 992 | // we don't want to do). | |||
| 993 | Instruction *In = dyn_cast<Instruction>(Val); | |||
| 994 | if (!In) | |||
| 995 | return true; | |||
| 996 | // The bitwidth of the source type is smaller than the destination. | |||
| 997 | // Check if the individual operation can be promoted. | |||
| 998 | switch (In->getOpcode()) { | |||
| 999 | case Instruction::PHI: | |||
| 1000 | case Instruction::ZExt: | |||
| 1001 | case Instruction::And: | |||
| 1002 | case Instruction::Or: | |||
| 1003 | case Instruction::Xor: | |||
| 1004 | case Instruction::LShr: // Shift right is ok. | |||
| 1005 | case Instruction::Select: | |||
| 1006 | case Instruction::Trunc: | |||
| 1007 | return true; | |||
| 1008 | case Instruction::ICmp: | |||
| 1009 | if (CmpInst *CI = cast<CmpInst>(In)) | |||
| 1010 | return CI->isEquality() || CI->isUnsigned(); | |||
| 1011 | llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1011 ); | |||
| 1012 | case Instruction::Add: | |||
| 1013 | return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); | |||
| 1014 | } | |||
| 1015 | return false; | |||
| 1016 | } | |||
| 1017 | ||||
| 1018 | void PolynomialMultiplyRecognize::promoteTo(Instruction *In, | |||
| 1019 | IntegerType *DestTy, BasicBlock *LoopB) { | |||
| 1020 | Type *OrigTy = In->getType(); | |||
| 1021 | assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote" ) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1021 , __extension__ __PRETTY_FUNCTION__)); | |||
| 1022 | ||||
| 1023 | // Leave boolean values alone. | |||
| 1024 | if (!In->getType()->isIntegerTy(1)) | |||
| 1025 | In->mutateType(DestTy); | |||
| 1026 | unsigned DestBW = DestTy->getBitWidth(); | |||
| 1027 | ||||
| 1028 | // Handle PHIs. | |||
| 1029 | if (PHINode *P = dyn_cast<PHINode>(In)) { | |||
| 1030 | unsigned N = P->getNumIncomingValues(); | |||
| 1031 | for (unsigned i = 0; i != N; ++i) { | |||
| 1032 | BasicBlock *InB = P->getIncomingBlock(i); | |||
| 1033 | if (InB == LoopB) | |||
| 1034 | continue; | |||
| 1035 | Value *InV = P->getIncomingValue(i); | |||
| 1036 | IntegerType *Ty = cast<IntegerType>(InV->getType()); | |||
| 1037 | // Do not promote values in PHI nodes of type i1. | |||
| 1038 | if (Ty != P->getType()) { | |||
| 1039 | // If the value type does not match the PHI type, the PHI type | |||
| 1040 | // must have been promoted. | |||
| 1041 | assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ? void (0) : __assert_fail ("Ty->getBitWidth() < DestBW" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1041 , __extension__ __PRETTY_FUNCTION__)); | |||
| 1042 | InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); | |||
| 1043 | P->setIncomingValue(i, InV); | |||
| 1044 | } | |||
| 1045 | } | |||
| 1046 | } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) { | |||
| 1047 | Value *Op = Z->getOperand(0); | |||
| 1048 | if (Op->getType() == Z->getType()) | |||
| 1049 | Z->replaceAllUsesWith(Op); | |||
| 1050 | Z->eraseFromParent(); | |||
| 1051 | return; | |||
| 1052 | } | |||
| 1053 | if (TruncInst *T = dyn_cast<TruncInst>(In)) { | |||
| 1054 | IntegerType *TruncTy = cast<IntegerType>(OrigTy); | |||
| 1055 | Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); | |||
| 1056 | Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); | |||
| 1057 | T->replaceAllUsesWith(And); | |||
| 1058 | T->eraseFromParent(); | |||
| 1059 | return; | |||
| 1060 | } | |||
| 1061 | ||||
| 1062 | // Promote immediates. | |||
| 1063 | for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { | |||
| 1064 | if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) | |||
| 1065 | if (CI->getType()->getBitWidth() < DestBW) | |||
| 1066 | In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); | |||
| 1067 | } | |||
| 1068 | } | |||
| 1069 | ||||
| 1070 | bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, | |||
| 1071 | BasicBlock *ExitB) { | |||
| 1072 | assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail ( "LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1072, __extension__ __PRETTY_FUNCTION__)); | |||
| 1073 | // Skip loops where the exit block has more than one predecessor. The values | |||
| 1074 | // coming from the loop block will be promoted to another type, and so the | |||
| 1075 | // values coming into the exit block from other predecessors would also have | |||
| 1076 | // to be promoted. | |||
| 1077 | if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) | |||
| 1078 | return false; | |||
| 1079 | IntegerType *DestTy = getPmpyType(); | |||
| 1080 | // Check if the exit values have types that are no wider than the type | |||
| 1081 | // that we want to promote to. | |||
| 1082 | unsigned DestBW = DestTy->getBitWidth(); | |||
| 1083 | for (PHINode &P : ExitB->phis()) { | |||
| 1084 | if (P.getNumIncomingValues() != 1) | |||
| 1085 | return false; | |||
| 1086 | assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void (0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1086, __extension__ __PRETTY_FUNCTION__)); | |||
| 1087 | IntegerType *T = dyn_cast<IntegerType>(P.getType()); | |||
| 1088 | if (!T || T->getBitWidth() > DestBW) | |||
| 1089 | return false; | |||
| 1090 | } | |||
| 1091 | ||||
| 1092 | // Check all instructions in the loop. | |||
| 1093 | for (Instruction &In : *LoopB) | |||
| 1094 | if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) | |||
| 1095 | return false; | |||
| 1096 | ||||
| 1097 | // Perform the promotion. | |||
| 1098 | std::vector<Instruction*> LoopIns; | |||
| 1099 | std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), | |||
| 1100 | [](Instruction &In) { return &In; }); | |||
| 1101 | for (Instruction *In : LoopIns) | |||
| 1102 | if (!In->isTerminator()) | |||
| 1103 | promoteTo(In, DestTy, LoopB); | |||
| 1104 | ||||
| 1105 | // Fix up the PHI nodes in the exit block. | |||
| 1106 | Instruction *EndI = ExitB->getFirstNonPHI(); | |||
| 1107 | BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); | |||
| 1108 | for (auto I = ExitB->begin(); I != End; ++I) { | |||
| 1109 | PHINode *P = dyn_cast<PHINode>(I); | |||
| 1110 | if (!P) | |||
| 1111 | break; | |||
| 1112 | Type *Ty0 = P->getIncomingValue(0)->getType(); | |||
| 1113 | Type *PTy = P->getType(); | |||
| 1114 | if (PTy != Ty0) { | |||
| 1115 | assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail ("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1115, __extension__ __PRETTY_FUNCTION__)); | |||
| 1116 | // In order to create the trunc, P must have the promoted type. | |||
| 1117 | P->mutateType(Ty0); | |||
| 1118 | Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); | |||
| 1119 | // In order for the RAUW to work, the types of P and T must match. | |||
| 1120 | P->mutateType(PTy); | |||
| 1121 | P->replaceAllUsesWith(T); | |||
| 1122 | // Final update of the P's type. | |||
| 1123 | P->mutateType(Ty0); | |||
| 1124 | cast<Instruction>(T)->setOperand(0, P); | |||
| 1125 | } | |||
| 1126 | } | |||
| 1127 | ||||
| 1128 | return true; | |||
| 1129 | } | |||
| 1130 | ||||
| 1131 | bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, | |||
| 1132 | ValueSeq &Cycle) { | |||
| 1133 | // Out = ..., In, ... | |||
| 1134 | if (Out == In) | |||
| 1135 | return true; | |||
| 1136 | ||||
| 1137 | auto *BB = cast<Instruction>(Out)->getParent(); | |||
| 1138 | bool HadPhi = false; | |||
| 1139 | ||||
| 1140 | for (auto *U : Out->users()) { | |||
| 1141 | auto *I = dyn_cast<Instruction>(&*U); | |||
| 1142 | if (I == nullptr || I->getParent() != BB) | |||
| 1143 | continue; | |||
| 1144 | // Make sure that there are no multi-iteration cycles, e.g. | |||
| 1145 | // p1 = phi(p2) | |||
| 1146 | // p2 = phi(p1) | |||
| 1147 | // The cycle p1->p2->p1 would span two loop iterations. | |||
| 1148 | // Check that there is only one phi in the cycle. | |||
| 1149 | bool IsPhi = isa<PHINode>(I); | |||
| 1150 | if (IsPhi && HadPhi) | |||
| 1151 | return false; | |||
| 1152 | HadPhi |= IsPhi; | |||
| 1153 | if (!Cycle.insert(I)) | |||
| 1154 | return false; | |||
| 1155 | if (findCycle(I, In, Cycle)) | |||
| 1156 | break; | |||
| 1157 | Cycle.remove(I); | |||
| 1158 | } | |||
| 1159 | return !Cycle.empty(); | |||
| 1160 | } | |||
| 1161 | ||||
| 1162 | void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI, | |||
| 1163 | ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) { | |||
| 1164 | // All the values in the cycle that are between the phi node and the | |||
| 1165 | // divider instruction will be classified as "early", all other values | |||
| 1166 | // will be "late". | |||
| 1167 | ||||
| 1168 | bool IsE = true; | |||
| 1169 | unsigned I, N = Cycle.size(); | |||
| 1170 | for (I = 0; I < N; ++I) { | |||
| 1171 | Value *V = Cycle[I]; | |||
| 1172 | if (DivI == V) | |||
| 1173 | IsE = false; | |||
| 1174 | else if (!isa<PHINode>(V)) | |||
| 1175 | continue; | |||
| 1176 | // Stop if found either. | |||
| 1177 | break; | |||
| 1178 | } | |||
| 1179 | // "I" is the index of either DivI or the phi node, whichever was first. | |||
| 1180 | // "E" is "false" or "true" respectively. | |||
| 1181 | ValueSeq &First = !IsE ? Early : Late; | |||
| 1182 | for (unsigned J = 0; J < I; ++J) | |||
| 1183 | First.insert(Cycle[J]); | |||
| 1184 | ||||
| 1185 | ValueSeq &Second = IsE ? Early : Late; | |||
| 1186 | Second.insert(Cycle[I]); | |||
| 1187 | for (++I; I < N; ++I) { | |||
| 1188 | Value *V = Cycle[I]; | |||
| 1189 | if (DivI == V || isa<PHINode>(V)) | |||
| 1190 | break; | |||
| 1191 | Second.insert(V); | |||
| 1192 | } | |||
| 1193 | ||||
| 1194 | for (; I < N; ++I) | |||
| 1195 | First.insert(Cycle[I]); | |||
| 1196 | } | |||
| 1197 | ||||
| 1198 | bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI, | |||
| 1199 | ValueSeq &Early, ValueSeq &Late) { | |||
| 1200 | // Select is an exception, since the condition value does not have to be | |||
| 1201 | // classified in the same way as the true/false values. The true/false | |||
| 1202 | // values do have to be both early or both late. | |||
| 1203 | if (UseI->getOpcode() == Instruction::Select) { | |||
| 1204 | Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2); | |||
| 1205 | if (Early.count(TV) || Early.count(FV)) { | |||
| 1206 | if (Late.count(TV) || Late.count(FV)) | |||
| 1207 | return false; | |||
| 1208 | Early.insert(UseI); | |||
| 1209 | } else if (Late.count(TV) || Late.count(FV)) { | |||
| 1210 | if (Early.count(TV) || Early.count(FV)) | |||
| 1211 | return false; | |||
| 1212 | Late.insert(UseI); | |||
| 1213 | } | |||
| 1214 | return true; | |||
| 1215 | } | |||
| 1216 | ||||
| 1217 | // Not sure what would be the example of this, but the code below relies | |||
| 1218 | // on having at least one operand. | |||
| 1219 | if (UseI->getNumOperands() == 0) | |||
| 1220 | return true; | |||
| 1221 | ||||
| 1222 | bool AE = true, AL = true; | |||
| 1223 | for (auto &I : UseI->operands()) { | |||
| 1224 | if (Early.count(&*I)) | |||
| 1225 | AL = false; | |||
| 1226 | else if (Late.count(&*I)) | |||
| 1227 | AE = false; | |||
| 1228 | } | |||
| 1229 | // If the operands appear "all early" and "all late" at the same time, | |||
| 1230 | // then it means that none of them are actually classified as either. | |||
| 1231 | // This is harmless. | |||
| 1232 | if (AE && AL) | |||
| 1233 | return true; | |||
| 1234 | // Conversely, if they are neither "all early" nor "all late", then | |||
| 1235 | // we have a mixture of early and late operands that is not a known | |||
| 1236 | // exception. | |||
| 1237 | if (!AE && !AL) | |||
| 1238 | return false; | |||
| 1239 | ||||
| 1240 | // Check that we have covered the two special cases. | |||
| 1241 | assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail ("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1241, __extension__ __PRETTY_FUNCTION__)); | |||
| 1242 | ||||
| 1243 | if (AE) | |||
| 1244 | Early.insert(UseI); | |||
| 1245 | else | |||
| 1246 | Late.insert(UseI); | |||
| 1247 | return true; | |||
| 1248 | } | |||
| 1249 | ||||
| 1250 | bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) { | |||
| 1251 | switch (I->getOpcode()) { | |||
| 1252 | case Instruction::And: | |||
| 1253 | case Instruction::Or: | |||
| 1254 | case Instruction::Xor: | |||
| 1255 | case Instruction::LShr: | |||
| 1256 | case Instruction::Shl: | |||
| 1257 | case Instruction::Select: | |||
| 1258 | case Instruction::ICmp: | |||
| 1259 | case Instruction::PHI: | |||
| 1260 | break; | |||
| 1261 | default: | |||
| 1262 | return false; | |||
| 1263 | } | |||
| 1264 | return true; | |||
| 1265 | } | |||
| 1266 | ||||
| 1267 | bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, | |||
| 1268 | unsigned IterCount) { | |||
| 1269 | auto *T = dyn_cast<IntegerType>(V->getType()); | |||
| 1270 | if (!T) | |||
| 1271 | return false; | |||
| 1272 | ||||
| 1273 | KnownBits Known(T->getBitWidth()); | |||
| 1274 | computeKnownBits(V, Known, DL); | |||
| 1275 | return Known.countMinLeadingZeros() >= IterCount; | |||
| 1276 | } | |||
| 1277 | ||||
| 1278 | bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, | |||
| 1279 | unsigned IterCount) { | |||
| 1280 | // Assume that all inputs to the value have the high bits zero. | |||
| 1281 | // Check if the value itself preserves the zeros in the high bits. | |||
| 1282 | if (auto *C = dyn_cast<ConstantInt>(V)) | |||
| 1283 | return C->getValue().countl_zero() >= IterCount; | |||
| 1284 | ||||
| 1285 | if (auto *I = dyn_cast<Instruction>(V)) { | |||
| 1286 | switch (I->getOpcode()) { | |||
| 1287 | case Instruction::And: | |||
| 1288 | case Instruction::Or: | |||
| 1289 | case Instruction::Xor: | |||
| 1290 | case Instruction::LShr: | |||
| 1291 | case Instruction::Select: | |||
| 1292 | case Instruction::ICmp: | |||
| 1293 | case Instruction::PHI: | |||
| 1294 | case Instruction::ZExt: | |||
| 1295 | return true; | |||
| 1296 | } | |||
| 1297 | } | |||
| 1298 | ||||
| 1299 | return false; | |||
| 1300 | } | |||
| 1301 | ||||
| 1302 | bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) { | |||
| 1303 | unsigned Opc = I->getOpcode(); | |||
| 1304 | if (Opc == Instruction::Shl || Opc == Instruction::LShr) | |||
| 1305 | return Op != I->getOperand(1); | |||
| 1306 | return true; | |||
| 1307 | } | |||
| 1308 | ||||
| 1309 | bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, | |||
| 1310 | BasicBlock *ExitB, unsigned IterCount) { | |||
| 1311 | Value *CIV = getCountIV(LoopB); | |||
| 1312 | if (CIV == nullptr) | |||
| 1313 | return false; | |||
| 1314 | auto *CIVTy = dyn_cast<IntegerType>(CIV->getType()); | |||
| 1315 | if (CIVTy == nullptr) | |||
| 1316 | return false; | |||
| 1317 | ||||
| 1318 | ValueSeq RShifts; | |||
| 1319 | ValueSeq Early, Late, Cycled; | |||
| 1320 | ||||
| 1321 | // Find all value cycles that contain logical right shifts by 1. | |||
| 1322 | for (Instruction &I : *LoopB) { | |||
| 1323 | using namespace PatternMatch; | |||
| 1324 | ||||
| 1325 | Value *V = nullptr; | |||
| 1326 | if (!match(&I, m_LShr(m_Value(V), m_One()))) | |||
| 1327 | continue; | |||
| 1328 | ValueSeq C; | |||
| 1329 | if (!findCycle(&I, V, C)) | |||
| 1330 | continue; | |||
| 1331 | ||||
| 1332 | // Found a cycle. | |||
| 1333 | C.insert(&I); | |||
| 1334 | classifyCycle(&I, C, Early, Late); | |||
| 1335 | Cycled.insert(C.begin(), C.end()); | |||
| 1336 | RShifts.insert(&I); | |||
| 1337 | } | |||
| 1338 | ||||
| 1339 | // Find the set of all values affected by the shift cycles, i.e. all | |||
| 1340 | // cycled values, and (recursively) all their users. | |||
| 1341 | ValueSeq Users(Cycled.begin(), Cycled.end()); | |||
| 1342 | for (unsigned i = 0; i < Users.size(); ++i) { | |||
| 1343 | Value *V = Users[i]; | |||
| 1344 | if (!isa<IntegerType>(V->getType())) | |||
| 1345 | return false; | |||
| 1346 | auto *R = cast<Instruction>(V); | |||
| 1347 | // If the instruction does not commute with shifts, the loop cannot | |||
| 1348 | // be unshifted. | |||
| 1349 | if (!commutesWithShift(R)) | |||
| 1350 | return false; | |||
| 1351 | for (User *U : R->users()) { | |||
| 1352 | auto *T = cast<Instruction>(U); | |||
| 1353 | // Skip users from outside of the loop. They will be handled later. | |||
| 1354 | // Also, skip the right-shifts and phi nodes, since they mix early | |||
| 1355 | // and late values. | |||
| 1356 | if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T)) | |||
| 1357 | continue; | |||
| 1358 | ||||
| 1359 | Users.insert(T); | |||
| 1360 | if (!classifyInst(T, Early, Late)) | |||
| 1361 | return false; | |||
| 1362 | } | |||
| 1363 | } | |||
| 1364 | ||||
| 1365 | if (Users.empty()) | |||
| 1366 | return false; | |||
| 1367 | ||||
| 1368 | // Verify that high bits remain zero. | |||
| 1369 | ValueSeq Internal(Users.begin(), Users.end()); | |||
| 1370 | ValueSeq Inputs; | |||
| 1371 | for (unsigned i = 0; i < Internal.size(); ++i) { | |||
| 1372 | auto *R = dyn_cast<Instruction>(Internal[i]); | |||
| 1373 | if (!R) | |||
| 1374 | continue; | |||
| 1375 | for (Value *Op : R->operands()) { | |||
| 1376 | auto *T = dyn_cast<Instruction>(Op); | |||
| 1377 | if (T && T->getParent() != LoopB) | |||
| 1378 | Inputs.insert(Op); | |||
| 1379 | else | |||
| 1380 | Internal.insert(Op); | |||
| 1381 | } | |||
| 1382 | } | |||
| 1383 | for (Value *V : Inputs) | |||
| 1384 | if (!highBitsAreZero(V, IterCount)) | |||
| 1385 | return false; | |||
| 1386 | for (Value *V : Internal) | |||
| 1387 | if (!keepsHighBitsZero(V, IterCount)) | |||
| 1388 | return false; | |||
| 1389 | ||||
| 1390 | // Finally, the work can be done. Unshift each user. | |||
| 1391 | IRBuilder<> IRB(LoopB); | |||
| 1392 | std::map<Value*,Value*> ShiftMap; | |||
| 1393 | ||||
| 1394 | using CastMapType = std::map<std::pair<Value *, Type *>, Value *>; | |||
| 1395 | ||||
| 1396 | CastMapType CastMap; | |||
| 1397 | ||||
| 1398 | auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V, | |||
| 1399 | IntegerType *Ty) -> Value* { | |||
| 1400 | auto H = CM.find(std::make_pair(V, Ty)); | |||
| 1401 | if (H != CM.end()) | |||
| 1402 | return H->second; | |||
| 1403 | Value *CV = IRB.CreateIntCast(V, Ty, false); | |||
| 1404 | CM.insert(std::make_pair(std::make_pair(V, Ty), CV)); | |||
| 1405 | return CV; | |||
| 1406 | }; | |||
| 1407 | ||||
| 1408 | for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) { | |||
| 1409 | using namespace PatternMatch; | |||
| 1410 | ||||
| 1411 | if (isa<PHINode>(I) || !Users.count(&*I)) | |||
| 1412 | continue; | |||
| 1413 | ||||
| 1414 | // Match lshr x, 1. | |||
| 1415 | Value *V = nullptr; | |||
| 1416 | if (match(&*I, m_LShr(m_Value(V), m_One()))) { | |||
| 1417 | replaceAllUsesOfWithIn(&*I, V, LoopB); | |||
| 1418 | continue; | |||
| 1419 | } | |||
| 1420 | // For each non-cycled operand, replace it with the corresponding | |||
| 1421 | // value shifted left. | |||
| 1422 | for (auto &J : I->operands()) { | |||
| 1423 | Value *Op = J.get(); | |||
| 1424 | if (!isOperandShifted(&*I, Op)) | |||
| 1425 | continue; | |||
| 1426 | if (Users.count(Op)) | |||
| 1427 | continue; | |||
| 1428 | // Skip shifting zeros. | |||
| 1429 | if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) | |||
| 1430 | continue; | |||
| 1431 | // Check if we have already generated a shift for this value. | |||
| 1432 | auto F = ShiftMap.find(Op); | |||
| 1433 | Value *W = (F != ShiftMap.end()) ? F->second : nullptr; | |||
| 1434 | if (W == nullptr) { | |||
| 1435 | IRB.SetInsertPoint(&*I); | |||
| 1436 | // First, the shift amount will be CIV or CIV+1, depending on | |||
| 1437 | // whether the value is early or late. Instead of creating CIV+1, | |||
| 1438 | // do a single shift of the value. | |||
| 1439 | Value *ShAmt = CIV, *ShVal = Op; | |||
| 1440 | auto *VTy = cast<IntegerType>(ShVal->getType()); | |||
| 1441 | auto *ATy = cast<IntegerType>(ShAmt->getType()); | |||
| 1442 | if (Late.count(&*I)) | |||
| 1443 | ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1)); | |||
| 1444 | // Second, the types of the shifted value and the shift amount | |||
| 1445 | // must match. | |||
| 1446 | if (VTy != ATy) { | |||
| 1447 | if (VTy->getBitWidth() < ATy->getBitWidth()) | |||
| 1448 | ShVal = upcast(CastMap, IRB, ShVal, ATy); | |||
| 1449 | else | |||
| 1450 | ShAmt = upcast(CastMap, IRB, ShAmt, VTy); | |||
| 1451 | } | |||
| 1452 | // Ready to generate the shift and memoize it. | |||
| 1453 | W = IRB.CreateShl(ShVal, ShAmt); | |||
| 1454 | ShiftMap.insert(std::make_pair(Op, W)); | |||
| 1455 | } | |||
| 1456 | I->replaceUsesOfWith(Op, W); | |||
| 1457 | } | |||
| 1458 | } | |||
| 1459 | ||||
| 1460 | // Update the users outside of the loop to account for having left | |||
| 1461 | // shifts. They would normally be shifted right in the loop, so shift | |||
| 1462 | // them right after the loop exit. | |||
| 1463 | // Take advantage of the loop-closed SSA form, which has all the post- | |||
| 1464 | // loop values in phi nodes. | |||
| 1465 | IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt()); | |||
| 1466 | for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) { | |||
| 1467 | if (!isa<PHINode>(P)) | |||
| 1468 | break; | |||
| 1469 | auto *PN = cast<PHINode>(P); | |||
| 1470 | Value *U = PN->getIncomingValueForBlock(LoopB); | |||
| 1471 | if (!Users.count(U)) | |||
| 1472 | continue; | |||
| 1473 | Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount)); | |||
| 1474 | PN->replaceAllUsesWith(S); | |||
| 1475 | // The above RAUW will create | |||
| 1476 | // S = lshr S, IterCount | |||
| 1477 | // so we need to fix it back into | |||
| 1478 | // S = lshr PN, IterCount | |||
| 1479 | cast<User>(S)->replaceUsesOfWith(S, PN); | |||
| 1480 | } | |||
| 1481 | ||||
| 1482 | return true; | |||
| 1483 | } | |||
| 1484 | ||||
| 1485 | void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { | |||
| 1486 | for (auto &I : *LoopB) | |||
| 1487 | if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT})) | |||
| 1488 | I.replaceAllUsesWith(SV); | |||
| 1489 | ||||
| 1490 | for (Instruction &I : llvm::make_early_inc_range(*LoopB)) | |||
| 1491 | RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI); | |||
| 1492 | } | |||
| 1493 | ||||
| 1494 | unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { | |||
| 1495 | // Arrays of coefficients of Q and the inverse, C. | |||
| 1496 | // Q[i] = coefficient at x^i. | |||
| 1497 | std::array<char,32> Q, C; | |||
| 1498 | ||||
| 1499 | for (unsigned i = 0; i < 32; ++i) { | |||
| 1500 | Q[i] = QP & 1; | |||
| 1501 | QP >>= 1; | |||
| 1502 | } | |||
| 1503 | assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail ("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1503, __extension__ __PRETTY_FUNCTION__)); | |||
| 1504 | ||||
| 1505 | // Find C, such that | |||
| 1506 | // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1 | |||
| 1507 | // | |||
| 1508 | // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the | |||
| 1509 | // operations * and + are & and ^ respectively. | |||
| 1510 | // | |||
| 1511 | // Find C[i] recursively, by comparing i-th coefficient in the product | |||
| 1512 | // with 0 (or 1 for i=0). | |||
| 1513 | // | |||
| 1514 | // C[0] = 1, since C[0] = Q[0], and Q[0] = 1. | |||
| 1515 | C[0] = 1; | |||
| 1516 | for (unsigned i = 1; i < 32; ++i) { | |||
| 1517 | // Solve for C[i] in: | |||
| 1518 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0 | |||
| 1519 | // This is equivalent to | |||
| 1520 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0 | |||
| 1521 | // which is | |||
| 1522 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i] | |||
| 1523 | unsigned T = 0; | |||
| 1524 | for (unsigned j = 0; j < i; ++j) | |||
| 1525 | T = T ^ (C[j] & Q[i-j]); | |||
| 1526 | C[i] = T; | |||
| 1527 | } | |||
| 1528 | ||||
| 1529 | unsigned QV = 0; | |||
| 1530 | for (unsigned i = 0; i < 32; ++i) | |||
| 1531 | if (C[i]) | |||
| 1532 | QV |= (1 << i); | |||
| 1533 | ||||
| 1534 | return QV; | |||
| 1535 | } | |||
| 1536 | ||||
| 1537 | Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, | |||
| 1538 | ParsedValues &PV) { | |||
| 1539 | IRBuilder<> B(&*At); | |||
| 1540 | Module *M = At->getParent()->getParent()->getParent(); | |||
| 1541 | Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw); | |||
| 1542 | ||||
| 1543 | Value *P = PV.P, *Q = PV.Q, *P0 = P; | |||
| 1544 | unsigned IC = PV.IterCount; | |||
| 1545 | ||||
| 1546 | if (PV.M != nullptr) | |||
| ||||
| 1547 | P0 = P = B.CreateXor(P, PV.M); | |||
| 1548 | ||||
| 1549 | // Create a bit mask to clear the high bits beyond IterCount. | |||
| 1550 | auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC)); | |||
| 1551 | ||||
| 1552 | if (PV.IterCount != 32) | |||
| 1553 | P = B.CreateAnd(P, BMI); | |||
| 1554 | ||||
| 1555 | if (PV.Inv) { | |||
| 1556 | auto *QI = dyn_cast<ConstantInt>(PV.Q); | |||
| 1557 | assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth() <= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1557 , __extension__ __PRETTY_FUNCTION__)); | |||
| 1558 | ||||
| 1559 | // Again, clearing bits beyond IterCount. | |||
| 1560 | unsigned M = (1 << PV.IterCount) - 1; | |||
| ||||
| 1561 | unsigned Tmp = (QI->getZExtValue() | 1) & M; | |||
| 1562 | unsigned QV = getInverseMxN(Tmp) & M; | |||
| 1563 | auto *QVI = ConstantInt::get(QI->getType(), QV); | |||
| 1564 | P = B.CreateCall(PMF, {P, QVI}); | |||
| 1565 | P = B.CreateTrunc(P, QI->getType()); | |||
| 1566 | if (IC != 32) | |||
| 1567 | P = B.CreateAnd(P, BMI); | |||
| 1568 | } | |||
| 1569 | ||||
| 1570 | Value *R = B.CreateCall(PMF, {P, Q}); | |||
| 1571 | ||||
| 1572 | if (PV.M != nullptr) | |||
| 1573 | R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false)); | |||
| 1574 | ||||
| 1575 | return R; | |||
| 1576 | } | |||
| 1577 | ||||
| 1578 | static bool hasZeroSignBit(const Value *V) { | |||
| 1579 | if (const auto *CI = dyn_cast<const ConstantInt>(V)) | |||
| 1580 | return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; | |||
| 1581 | const Instruction *I = dyn_cast<const Instruction>(V); | |||
| 1582 | if (!I) | |||
| 1583 | return false; | |||
| 1584 | switch (I->getOpcode()) { | |||
| 1585 | case Instruction::LShr: | |||
| 1586 | if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) | |||
| 1587 | return SI->getZExtValue() > 0; | |||
| 1588 | return false; | |||
| 1589 | case Instruction::Or: | |||
| 1590 | case Instruction::Xor: | |||
| 1591 | return hasZeroSignBit(I->getOperand(0)) && | |||
| 1592 | hasZeroSignBit(I->getOperand(1)); | |||
| 1593 | case Instruction::And: | |||
| 1594 | return hasZeroSignBit(I->getOperand(0)) || | |||
| 1595 | hasZeroSignBit(I->getOperand(1)); | |||
| 1596 | } | |||
| 1597 | return false; | |||
| 1598 | } | |||
| 1599 | ||||
| 1600 | void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { | |||
| 1601 | S.addRule("sink-zext", | |||
| 1602 | // Sink zext past bitwise operations. | |||
| 1603 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1604 | if (I->getOpcode() != Instruction::ZExt) | |||
| 1605 | return nullptr; | |||
| 1606 | Instruction *T = dyn_cast<Instruction>(I->getOperand(0)); | |||
| 1607 | if (!T) | |||
| 1608 | return nullptr; | |||
| 1609 | switch (T->getOpcode()) { | |||
| 1610 | case Instruction::And: | |||
| 1611 | case Instruction::Or: | |||
| 1612 | case Instruction::Xor: | |||
| 1613 | break; | |||
| 1614 | default: | |||
| 1615 | return nullptr; | |||
| 1616 | } | |||
| 1617 | IRBuilder<> B(Ctx); | |||
| 1618 | return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(), | |||
| 1619 | B.CreateZExt(T->getOperand(0), I->getType()), | |||
| 1620 | B.CreateZExt(T->getOperand(1), I->getType())); | |||
| 1621 | }); | |||
| 1622 | S.addRule("xor/and -> and/xor", | |||
| 1623 | // (xor (and x a) (and y a)) -> (and (xor x y) a) | |||
| 1624 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1625 | if (I->getOpcode() != Instruction::Xor) | |||
| 1626 | return nullptr; | |||
| 1627 | Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0)); | |||
| 1628 | Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1)); | |||
| 1629 | if (!And0 || !And1) | |||
| 1630 | return nullptr; | |||
| 1631 | if (And0->getOpcode() != Instruction::And || | |||
| 1632 | And1->getOpcode() != Instruction::And) | |||
| 1633 | return nullptr; | |||
| 1634 | if (And0->getOperand(1) != And1->getOperand(1)) | |||
| 1635 | return nullptr; | |||
| 1636 | IRBuilder<> B(Ctx); | |||
| 1637 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), | |||
| 1638 | And0->getOperand(1)); | |||
| 1639 | }); | |||
| 1640 | S.addRule("sink binop into select", | |||
| 1641 | // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) | |||
| 1642 | // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) | |||
| 1643 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1644 | BinaryOperator *BO = dyn_cast<BinaryOperator>(I); | |||
| 1645 | if (!BO) | |||
| 1646 | return nullptr; | |||
| 1647 | Instruction::BinaryOps Op = BO->getOpcode(); | |||
| 1648 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) { | |||
| 1649 | IRBuilder<> B(Ctx); | |||
| 1650 | Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); | |||
| 1651 | Value *Z = BO->getOperand(1); | |||
| 1652 | return B.CreateSelect(Sel->getCondition(), | |||
| 1653 | B.CreateBinOp(Op, X, Z), | |||
| 1654 | B.CreateBinOp(Op, Y, Z)); | |||
| 1655 | } | |||
| 1656 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) { | |||
| 1657 | IRBuilder<> B(Ctx); | |||
| 1658 | Value *X = BO->getOperand(0); | |||
| 1659 | Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); | |||
| 1660 | return B.CreateSelect(Sel->getCondition(), | |||
| 1661 | B.CreateBinOp(Op, X, Y), | |||
| 1662 | B.CreateBinOp(Op, X, Z)); | |||
| 1663 | } | |||
| 1664 | return nullptr; | |||
| 1665 | }); | |||
| 1666 | S.addRule("fold select-select", | |||
| 1667 | // (select c (select c x y) z) -> (select c x z) | |||
| 1668 | // (select c x (select c y z)) -> (select c x z) | |||
| 1669 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1670 | SelectInst *Sel = dyn_cast<SelectInst>(I); | |||
| 1671 | if (!Sel) | |||
| 1672 | return nullptr; | |||
| 1673 | IRBuilder<> B(Ctx); | |||
| 1674 | Value *C = Sel->getCondition(); | |||
| 1675 | if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) { | |||
| 1676 | if (Sel0->getCondition() == C) | |||
| 1677 | return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); | |||
| 1678 | } | |||
| 1679 | if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) { | |||
| 1680 | if (Sel1->getCondition() == C) | |||
| 1681 | return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); | |||
| 1682 | } | |||
| 1683 | return nullptr; | |||
| 1684 | }); | |||
| 1685 | S.addRule("or-signbit -> xor-signbit", | |||
| 1686 | // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) | |||
| 1687 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1688 | if (I->getOpcode() != Instruction::Or) | |||
| 1689 | return nullptr; | |||
| 1690 | ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
| 1691 | if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) | |||
| 1692 | return nullptr; | |||
| 1693 | if (!hasZeroSignBit(I->getOperand(0))) | |||
| 1694 | return nullptr; | |||
| 1695 | return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); | |||
| 1696 | }); | |||
| 1697 | S.addRule("sink lshr into binop", | |||
| 1698 | // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) | |||
| 1699 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1700 | if (I->getOpcode() != Instruction::LShr) | |||
| 1701 | return nullptr; | |||
| 1702 | BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0)); | |||
| 1703 | if (!BitOp) | |||
| 1704 | return nullptr; | |||
| 1705 | switch (BitOp->getOpcode()) { | |||
| 1706 | case Instruction::And: | |||
| 1707 | case Instruction::Or: | |||
| 1708 | case Instruction::Xor: | |||
| 1709 | break; | |||
| 1710 | default: | |||
| 1711 | return nullptr; | |||
| 1712 | } | |||
| 1713 | IRBuilder<> B(Ctx); | |||
| 1714 | Value *S = I->getOperand(1); | |||
| 1715 | return B.CreateBinOp(BitOp->getOpcode(), | |||
| 1716 | B.CreateLShr(BitOp->getOperand(0), S), | |||
| 1717 | B.CreateLShr(BitOp->getOperand(1), S)); | |||
| 1718 | }); | |||
| 1719 | S.addRule("expose bitop-const", | |||
| 1720 | // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) | |||
| 1721 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1722 | auto IsBitOp = [](unsigned Op) -> bool { | |||
| 1723 | switch (Op) { | |||
| 1724 | case Instruction::And: | |||
| 1725 | case Instruction::Or: | |||
| 1726 | case Instruction::Xor: | |||
| 1727 | return true; | |||
| 1728 | } | |||
| 1729 | return false; | |||
| 1730 | }; | |||
| 1731 | BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I); | |||
| 1732 | if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) | |||
| 1733 | return nullptr; | |||
| 1734 | BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0)); | |||
| 1735 | if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) | |||
| 1736 | return nullptr; | |||
| 1737 | ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1)); | |||
| 1738 | ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1)); | |||
| 1739 | if (!CA || !CB) | |||
| 1740 | return nullptr; | |||
| 1741 | IRBuilder<> B(Ctx); | |||
| 1742 | Value *X = BitOp2->getOperand(0); | |||
| 1743 | return B.CreateBinOp(BitOp2->getOpcode(), X, | |||
| 1744 | B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); | |||
| 1745 | }); | |||
| 1746 | } | |||
| 1747 | ||||
| 1748 | void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { | |||
| 1749 | S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", | |||
| 1750 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
| 1751 | if (I->getOpcode() != Instruction::And) | |||
| 1752 | return nullptr; | |||
| 1753 | Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); | |||
| 1754 | ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
| 1755 | if (!Xor || !C0) | |||
| 1756 | return nullptr; | |||
| 1757 | if (Xor->getOpcode() != Instruction::Xor) | |||
| 1758 | return nullptr; | |||
| 1759 | Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); | |||
| 1760 | Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); | |||
| 1761 | // Pick the first non-null and. | |||
| 1762 | if (!And0 || And0->getOpcode() != Instruction::And) | |||
| 1763 | std::swap(And0, And1); | |||
| 1764 | ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); | |||
| 1765 | if (!C1) | |||
| 1766 | return nullptr; | |||
| 1767 | uint32_t V0 = C0->getZExtValue(); | |||
| 1768 | uint32_t V1 = C1->getZExtValue(); | |||
| 1769 | if (V0 != (V0 & V1)) | |||
| 1770 | return nullptr; | |||
| 1771 | IRBuilder<> B(Ctx); | |||
| 1772 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); | |||
| 1773 | }); | |||
| 1774 | } | |||
| 1775 | ||||
| 1776 | bool PolynomialMultiplyRecognize::recognize() { | |||
| 1777 | LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false) | |||
| 1778 | << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false); | |||
| 1779 | // Restrictions: | |||
| 1780 | // - The loop must consist of a single block. | |||
| 1781 | // - The iteration count must be known at compile-time. | |||
| 1782 | // - The loop must have an induction variable starting from 0, and | |||
| 1783 | // incremented in each iteration of the loop. | |||
| 1784 | BasicBlock *LoopB = CurLoop->getHeader(); | |||
| 1785 | LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop header:\n" << * LoopB; } } while (false); | |||
| 1786 | ||||
| 1787 | if (LoopB != CurLoop->getLoopLatch()) | |||
| 1788 | return false; | |||
| 1789 | BasicBlock *ExitB = CurLoop->getExitBlock(); | |||
| 1790 | if (ExitB == nullptr) | |||
| 1791 | return false; | |||
| 1792 | BasicBlock *EntryB = CurLoop->getLoopPreheader(); | |||
| 1793 | if (EntryB == nullptr) | |||
| 1794 | return false; | |||
| 1795 | ||||
| 1796 | unsigned IterCount = 0; | |||
| 1797 | const SCEV *CT = SE.getBackedgeTakenCount(CurLoop); | |||
| 1798 | if (isa<SCEVCouldNotCompute>(CT)) | |||
| 1799 | return false; | |||
| 1800 | if (auto *CV = dyn_cast<SCEVConstant>(CT)) | |||
| 1801 | IterCount = CV->getValue()->getZExtValue() + 1; | |||
| 1802 | ||||
| 1803 | Value *CIV = getCountIV(LoopB); | |||
| 1804 | ParsedValues PV; | |||
| 1805 | Simplifier PreSimp; | |||
| 1806 | PV.IterCount = IterCount; | |||
| 1807 | LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false) | |||
| 1808 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false); | |||
| 1809 | ||||
| 1810 | setupPreSimplifier(PreSimp); | |||
| 1811 | ||||
| 1812 | // Perform a preliminary scan of select instructions to see if any of them | |||
| 1813 | // looks like a generator of the polynomial multiply steps. Assume that a | |||
| 1814 | // loop can only contain a single transformable operation, so stop the | |||
| 1815 | // traversal after the first reasonable candidate was found. | |||
| 1816 | // XXX: Currently this approach can modify the loop before being 100% sure | |||
| 1817 | // that the transformation can be carried out. | |||
| 1818 | bool FoundPreScan = false; | |||
| 1819 | auto FeedsPHI = [LoopB](const Value *V) -> bool { | |||
| 1820 | for (const Value *U : V->users()) { | |||
| 1821 | if (const auto *P = dyn_cast<const PHINode>(U)) | |||
| 1822 | if (P->getParent() == LoopB) | |||
| 1823 | return true; | |||
| 1824 | } | |||
| 1825 | return false; | |||
| 1826 | }; | |||
| 1827 | for (Instruction &In : *LoopB) { | |||
| 1828 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
| 1829 | if (!SI || !FeedsPHI(SI)) | |||
| 1830 | continue; | |||
| 1831 | ||||
| 1832 | Simplifier::Context C(SI); | |||
| 1833 | Value *T = PreSimp.simplify(C); | |||
| 1834 | SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; | |||
| 1835 | LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'; } } while (false); | |||
| 1836 | if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { | |||
| 1837 | FoundPreScan = true; | |||
| 1838 | if (SelI != SI) { | |||
| 1839 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
| 1840 | SI->replaceAllUsesWith(NewSel); | |||
| 1841 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
| 1842 | } | |||
| 1843 | break; | |||
| 1844 | } | |||
| 1845 | } | |||
| 1846 | ||||
| 1847 | if (!FoundPreScan) { | |||
| 1848 | LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n" ; } } while (false); | |||
| 1849 | return false; | |||
| 1850 | } | |||
| 1851 | ||||
| 1852 | if (!PV.Left) { | |||
| 1853 | // The right shift version actually only returns the higher bits of | |||
| 1854 | // the result (each iteration discards the LSB). If we want to convert it | |||
| 1855 | // to a left-shifting loop, the working data type must be at least as | |||
| 1856 | // wide as the target's pmpy instruction. | |||
| 1857 | if (!promoteTypes(LoopB, ExitB)) | |||
| 1858 | return false; | |||
| 1859 | // Run post-promotion simplifications. | |||
| 1860 | Simplifier PostSimp; | |||
| 1861 | setupPostSimplifier(PostSimp); | |||
| 1862 | for (Instruction &In : *LoopB) { | |||
| 1863 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
| 1864 | if (!SI || !FeedsPHI(SI)) | |||
| 1865 | continue; | |||
| 1866 | Simplifier::Context C(SI); | |||
| 1867 | Value *T = PostSimp.simplify(C); | |||
| 1868 | SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); | |||
| 1869 | if (SelI != SI) { | |||
| 1870 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
| 1871 | SI->replaceAllUsesWith(NewSel); | |||
| 1872 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
| 1873 | } | |||
| 1874 | break; | |||
| 1875 | } | |||
| 1876 | ||||
| 1877 | if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) | |||
| 1878 | return false; | |||
| 1879 | cleanupLoopBody(LoopB); | |||
| 1880 | } | |||
| 1881 | ||||
| 1882 | // Scan the loop again, find the generating select instruction. | |||
| 1883 | bool FoundScan = false; | |||
| 1884 | for (Instruction &In : *LoopB) { | |||
| 1885 | SelectInst *SelI = dyn_cast<SelectInst>(&In); | |||
| 1886 | if (!SelI) | |||
| 1887 | continue; | |||
| 1888 | LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI << '\n'; } } while (false); | |||
| 1889 | FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); | |||
| 1890 | if (FoundScan) | |||
| 1891 | break; | |||
| 1892 | } | |||
| 1893 | assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail ("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1893, __extension__ __PRETTY_FUNCTION__)); | |||
| 1894 | ||||
| 1895 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1896 | StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1897 | if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1898 | dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1899 | elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1900 | dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1901 | << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1902 | dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1903 | if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1904 | dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1905 | dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1906 | dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
| 1907 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false); | |||
| 1908 | ||||
| 1909 | BasicBlock::iterator At(EntryB->getTerminator()); | |||
| 1910 | Value *PM = generate(At, PV); | |||
| 1911 | if (PM == nullptr) | |||
| 1912 | return false; | |||
| 1913 | ||||
| 1914 | if (PM->getType() != PV.Res->getType()) | |||
| 1915 | PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false); | |||
| 1916 | ||||
| 1917 | PV.Res->replaceAllUsesWith(PM); | |||
| 1918 | PV.Res->eraseFromParent(); | |||
| 1919 | return true; | |||
| 1920 | } | |||
| 1921 | ||||
| 1922 | int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { | |||
| 1923 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) | |||
| 1924 | return SC->getAPInt().getSExtValue(); | |||
| 1925 | return 0; | |||
| 1926 | } | |||
| 1927 | ||||
| 1928 | bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { | |||
| 1929 | // Allow volatile stores if HexagonVolatileMemcpy is enabled. | |||
| 1930 | if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple()) | |||
| 1931 | return false; | |||
| 1932 | ||||
| 1933 | Value *StoredVal = SI->getValueOperand(); | |||
| 1934 | Value *StorePtr = SI->getPointerOperand(); | |||
| 1935 | ||||
| 1936 | // Reject stores that are so large that they overflow an unsigned. | |||
| 1937 | uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); | |||
| 1938 | if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) | |||
| 1939 | return false; | |||
| 1940 | ||||
| 1941 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
| 1942 | // loop, which indicates a strided store. If we have something else, it's a | |||
| 1943 | // random store we can't handle. | |||
| 1944 | auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
| 1945 | if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) | |||
| 1946 | return false; | |||
| 1947 | ||||
| 1948 | // Check to see if the stride matches the size of the store. If so, then we | |||
| 1949 | // know that every byte is touched in the loop. | |||
| 1950 | int Stride = getSCEVStride(StoreEv); | |||
| 1951 | if (Stride == 0) | |||
| 1952 | return false; | |||
| 1953 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
| 1954 | if (StoreSize != unsigned(std::abs(Stride))) | |||
| 1955 | return false; | |||
| 1956 | ||||
| 1957 | // The store must be feeding a non-volatile load. | |||
| 1958 | LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); | |||
| 1959 | if (!LI || !LI->isSimple()) | |||
| 1960 | return false; | |||
| 1961 | ||||
| 1962 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
| 1963 | // loop, which indicates a strided load. If we have something else, it's a | |||
| 1964 | // random load we can't handle. | |||
| 1965 | Value *LoadPtr = LI->getPointerOperand(); | |||
| 1966 | auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr)); | |||
| 1967 | if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) | |||
| 1968 | return false; | |||
| 1969 | ||||
| 1970 | // The store and load must share the same stride. | |||
| 1971 | if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) | |||
| 1972 | return false; | |||
| 1973 | ||||
| 1974 | // Success. This store can be converted into a memcpy. | |||
| 1975 | return true; | |||
| 1976 | } | |||
| 1977 | ||||
| 1978 | /// mayLoopAccessLocation - Return true if the specified loop might access the | |||
| 1979 | /// specified pointer location, which is a loop-strided access. The 'Access' | |||
| 1980 | /// argument specifies what the verboten forms of access are (read or write). | |||
| 1981 | static bool | |||
| 1982 | mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, | |||
| 1983 | const SCEV *BECount, unsigned StoreSize, | |||
| 1984 | AliasAnalysis &AA, | |||
| 1985 | SmallPtrSetImpl<Instruction *> &Ignored) { | |||
| 1986 | // Get the location that may be stored across the loop. Since the access | |||
| 1987 | // is strided positively through memory, we say that the modified location | |||
| 1988 | // starts at the pointer and has infinite size. | |||
| 1989 | LocationSize AccessSize = LocationSize::afterPointer(); | |||
| 1990 | ||||
| 1991 | // If the loop iterates a fixed number of times, we can refine the access | |||
| 1992 | // size to be exactly the size of the memset, which is (BECount+1)*StoreSize | |||
| 1993 | if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) | |||
| 1994 | AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * | |||
| 1995 | StoreSize); | |||
| 1996 | ||||
| 1997 | // TODO: For this to be really effective, we have to dive into the pointer | |||
| 1998 | // operand in the store. Store to &A[i] of 100 will always return may alias | |||
| 1999 | // with store of &A[100], we need to StoreLoc to be "A" with size of 100, | |||
| 2000 | // which will then no-alias a store to &A[100]. | |||
| 2001 | MemoryLocation StoreLoc(Ptr, AccessSize); | |||
| 2002 | ||||
| 2003 | for (auto *B : L->blocks()) | |||
| 2004 | for (auto &I : *B) | |||
| 2005 | if (Ignored.count(&I) == 0 && | |||
| 2006 | isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access)) | |||
| 2007 | return true; | |||
| 2008 | ||||
| 2009 | return false; | |||
| 2010 | } | |||
| 2011 | ||||
| 2012 | void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB, | |||
| 2013 | SmallVectorImpl<StoreInst*> &Stores) { | |||
| 2014 | Stores.clear(); | |||
| 2015 | for (Instruction &I : *BB) | |||
| 2016 | if (StoreInst *SI = dyn_cast<StoreInst>(&I)) | |||
| 2017 | if (isLegalStore(CurLoop, SI)) | |||
| 2018 | Stores.push_back(SI); | |||
| 2019 | } | |||
| 2020 | ||||
| 2021 | bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, | |||
| 2022 | StoreInst *SI, const SCEV *BECount) { | |||
| 2023 | assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)) | |||
| 2024 | "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)) | |||
| 2025 | "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)); | |||
| 2026 | ||||
| 2027 | Value *StorePtr = SI->getPointerOperand(); | |||
| 2028 | auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
| 2029 | unsigned Stride = getSCEVStride(StoreEv); | |||
| 2030 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
| 2031 | if (Stride != StoreSize) | |||
| 2032 | return false; | |||
| 2033 | ||||
| 2034 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
| 2035 | // loop, which indicates a strided load. If we have something else, it's a | |||
| 2036 | // random load we can't handle. | |||
| 2037 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
| 2038 | auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); | |||
| 2039 | ||||
| 2040 | // The trip count of the loop and the base pointer of the addrec SCEV is | |||
| 2041 | // guaranteed to be loop invariant, which means that it should dominate the | |||
| 2042 | // header. This allows us to insert code for it in the preheader. | |||
| 2043 | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | |||
| 2044 | Instruction *ExpPt = Preheader->getTerminator(); | |||
| 2045 | IRBuilder<> Builder(ExpPt); | |||
| 2046 | SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom"); | |||
| 2047 | ||||
| 2048 | Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); | |||
| 2049 | ||||
| 2050 | // Okay, we have a strided store "p[i]" of a loaded value. We can turn | |||
| 2051 | // this into a memcpy/memmove in the loop preheader now if we want. However, | |||
| 2052 | // this would be unsafe to do if there is anything else in the loop that may | |||
| 2053 | // read or write the memory region we're storing to. For memcpy, this | |||
| 2054 | // includes the load that feeds the stores. Check for an alias by generating | |||
| 2055 | // the base address and checking everything. | |||
| 2056 | Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), | |||
| 2057 | Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt); | |||
| 2058 | Value *LoadBasePtr = nullptr; | |||
| 2059 | ||||
| 2060 | bool Overlap = false; | |||
| 2061 | bool DestVolatile = SI->isVolatile(); | |||
| 2062 | Type *BECountTy = BECount->getType(); | |||
| 2063 | ||||
| 2064 | if (DestVolatile) { | |||
| 2065 | // The trip count must fit in i32, since it is the type of the "num_words" | |||
| 2066 | // argument to hexagon_memcpy_forward_vp4cp4n2. | |||
| 2067 | if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) { | |||
| 2068 | CleanupAndExit: | |||
| 2069 | // If we generated new code for the base pointer, clean up. | |||
| 2070 | Expander.clear(); | |||
| 2071 | if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { | |||
| 2072 | RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); | |||
| 2073 | StoreBasePtr = nullptr; | |||
| 2074 | } | |||
| 2075 | if (LoadBasePtr) { | |||
| 2076 | RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); | |||
| 2077 | LoadBasePtr = nullptr; | |||
| 2078 | } | |||
| 2079 | return false; | |||
| 2080 | } | |||
| 2081 | } | |||
| 2082 | ||||
| 2083 | SmallPtrSet<Instruction*, 2> Ignore1; | |||
| 2084 | Ignore1.insert(SI); | |||
| 2085 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, | |||
| 2086 | StoreSize, *AA, Ignore1)) { | |||
| 2087 | // Check if the load is the offending instruction. | |||
| 2088 | Ignore1.insert(LI); | |||
| 2089 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, | |||
| 2090 | BECount, StoreSize, *AA, Ignore1)) { | |||
| 2091 | // Still bad. Nothing we can do. | |||
| 2092 | goto CleanupAndExit; | |||
| 2093 | } | |||
| 2094 | // It worked with the load ignored. | |||
| 2095 | Overlap = true; | |||
| 2096 | } | |||
| 2097 | ||||
| 2098 | if (!Overlap) { | |||
| 2099 | if (DisableMemcpyIdiom || !HasMemcpy) | |||
| 2100 | goto CleanupAndExit; | |||
| 2101 | } else { | |||
| 2102 | // Don't generate memmove if this function will be inlined. This is | |||
| 2103 | // because the caller will undergo this transformation after inlining. | |||
| 2104 | Function *Func = CurLoop->getHeader()->getParent(); | |||
| 2105 | if (Func->hasFnAttribute(Attribute::AlwaysInline)) | |||
| 2106 | goto CleanupAndExit; | |||
| 2107 | ||||
| 2108 | // In case of a memmove, the call to memmove will be executed instead | |||
| 2109 | // of the loop, so we need to make sure that there is nothing else in | |||
| 2110 | // the loop than the load, store and instructions that these two depend | |||
| 2111 | // on. | |||
| 2112 | SmallVector<Instruction*,2> Insts; | |||
| 2113 | Insts.push_back(SI); | |||
| 2114 | Insts.push_back(LI); | |||
| 2115 | if (!coverLoop(CurLoop, Insts)) | |||
| 2116 | goto CleanupAndExit; | |||
| 2117 | ||||
| 2118 | if (DisableMemmoveIdiom || !HasMemmove) | |||
| 2119 | goto CleanupAndExit; | |||
| 2120 | bool IsNested = CurLoop->getParentLoop() != nullptr; | |||
| 2121 | if (IsNested && OnlyNonNestedMemmove) | |||
| 2122 | goto CleanupAndExit; | |||
| 2123 | } | |||
| 2124 | ||||
| 2125 | // For a memcpy, we have to make sure that the input array is not being | |||
| 2126 | // mutated by the loop. | |||
| 2127 | LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), | |||
| 2128 | Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt); | |||
| 2129 | ||||
| 2130 | SmallPtrSet<Instruction*, 2> Ignore2; | |||
| 2131 | Ignore2.insert(SI); | |||
| 2132 | if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, | |||
| 2133 | StoreSize, *AA, Ignore2)) | |||
| 2134 | goto CleanupAndExit; | |||
| 2135 | ||||
| 2136 | // Check the stride. | |||
| 2137 | bool StridePos = getSCEVStride(LoadEv) >= 0; | |||
| 2138 | ||||
| 2139 | // Currently, the volatile memcpy only emulates traversing memory forward. | |||
| 2140 | if (!StridePos && DestVolatile) | |||
| 2141 | goto CleanupAndExit; | |||
| 2142 | ||||
| 2143 | bool RuntimeCheck = (Overlap || DestVolatile); | |||
| 2144 | ||||
| 2145 | BasicBlock *ExitB; | |||
| 2146 | if (RuntimeCheck) { | |||
| 2147 | // The runtime check needs a single exit block. | |||
| 2148 | SmallVector<BasicBlock*, 8> ExitBlocks; | |||
| 2149 | CurLoop->getUniqueExitBlocks(ExitBlocks); | |||
| 2150 | if (ExitBlocks.size() != 1) | |||
| 2151 | goto CleanupAndExit; | |||
| 2152 | ExitB = ExitBlocks[0]; | |||
| 2153 | } | |||
| 2154 | ||||
| 2155 | // The # stored bytes is (BECount+1)*Size. Expand the trip count out to | |||
| 2156 | // pointer size if it isn't already. | |||
| 2157 | LLVMContext &Ctx = SI->getContext(); | |||
| 2158 | BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); | |||
| 2159 | DebugLoc DLoc = SI->getDebugLoc(); | |||
| 2160 | ||||
| 2161 | const SCEV *NumBytesS = | |||
| 2162 | SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); | |||
| 2163 | if (StoreSize != 1) | |||
| 2164 | NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), | |||
| 2165 | SCEV::FlagNUW); | |||
| 2166 | Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); | |||
| 2167 | if (Instruction *In = dyn_cast<Instruction>(NumBytes)) | |||
| 2168 | if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) | |||
| 2169 | NumBytes = Simp; | |||
| 2170 | ||||
| 2171 | CallInst *NewCall; | |||
| 2172 | ||||
| 2173 | if (RuntimeCheck) { | |||
| 2174 | unsigned Threshold = RuntimeMemSizeThreshold; | |||
| 2175 | if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) { | |||
| 2176 | uint64_t C = CI->getZExtValue(); | |||
| 2177 | if (Threshold != 0 && C < Threshold) | |||
| 2178 | goto CleanupAndExit; | |||
| 2179 | if (C < CompileTimeMemSizeThreshold) | |||
| 2180 | goto CleanupAndExit; | |||
| 2181 | } | |||
| 2182 | ||||
| 2183 | BasicBlock *Header = CurLoop->getHeader(); | |||
| 2184 | Function *Func = Header->getParent(); | |||
| 2185 | Loop *ParentL = LF->getLoopFor(Preheader); | |||
| 2186 | StringRef HeaderName = Header->getName(); | |||
| 2187 | ||||
| 2188 | // Create a new (empty) preheader, and update the PHI nodes in the | |||
| 2189 | // header to use the new preheader. | |||
| 2190 | BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", | |||
| 2191 | Func, Header); | |||
| 2192 | if (ParentL) | |||
| 2193 | ParentL->addBasicBlockToLoop(NewPreheader, *LF); | |||
| 2194 | IRBuilder<>(NewPreheader).CreateBr(Header); | |||
| 2195 | for (auto &In : *Header) { | |||
| 2196 | PHINode *PN = dyn_cast<PHINode>(&In); | |||
| 2197 | if (!PN) | |||
| 2198 | break; | |||
| 2199 | int bx = PN->getBasicBlockIndex(Preheader); | |||
| 2200 | if (bx >= 0) | |||
| 2201 | PN->setIncomingBlock(bx, NewPreheader); | |||
| 2202 | } | |||
| 2203 | DT->addNewBlock(NewPreheader, Preheader); | |||
| 2204 | DT->changeImmediateDominator(Header, NewPreheader); | |||
| 2205 | ||||
| 2206 | // Check for safe conditions to execute memmove. | |||
| 2207 | // If stride is positive, copying things from higher to lower addresses | |||
| 2208 | // is equivalent to memmove. For negative stride, it's the other way | |||
| 2209 | // around. Copying forward in memory with positive stride may not be | |||
| 2210 | // same as memmove since we may be copying values that we just stored | |||
| 2211 | // in some previous iteration. | |||
| 2212 | Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); | |||
| 2213 | Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); | |||
| 2214 | Value *LowA = StridePos ? SA : LA; | |||
| 2215 | Value *HighA = StridePos ? LA : SA; | |||
| 2216 | Value *CmpA = Builder.CreateICmpULT(LowA, HighA); | |||
| 2217 | Value *Cond = CmpA; | |||
| 2218 | ||||
| 2219 | // Check for distance between pointers. Since the case LowA < HighA | |||
| 2220 | // is checked for above, assume LowA >= HighA. | |||
| 2221 | Value *Dist = Builder.CreateSub(LowA, HighA); | |||
| 2222 | Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist); | |||
| 2223 | Value *CmpEither = Builder.CreateOr(Cond, CmpD); | |||
| 2224 | Cond = CmpEither; | |||
| 2225 | ||||
| 2226 | if (Threshold != 0) { | |||
| 2227 | Type *Ty = NumBytes->getType(); | |||
| 2228 | Value *Thr = ConstantInt::get(Ty, Threshold); | |||
| 2229 | Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); | |||
| 2230 | Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); | |||
| 2231 | Cond = CmpBoth; | |||
| 2232 | } | |||
| 2233 | BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", | |||
| 2234 | Func, NewPreheader); | |||
| 2235 | if (ParentL) | |||
| 2236 | ParentL->addBasicBlockToLoop(MemmoveB, *LF); | |||
| 2237 | Instruction *OldT = Preheader->getTerminator(); | |||
| 2238 | Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); | |||
| 2239 | OldT->eraseFromParent(); | |||
| 2240 | Preheader->setName(Preheader->getName()+".old"); | |||
| 2241 | DT->addNewBlock(MemmoveB, Preheader); | |||
| 2242 | // Find the new immediate dominator of the exit block. | |||
| 2243 | BasicBlock *ExitD = Preheader; | |||
| 2244 | for (BasicBlock *PB : predecessors(ExitB)) { | |||
| 2245 | ExitD = DT->findNearestCommonDominator(ExitD, PB); | |||
| 2246 | if (!ExitD) | |||
| 2247 | break; | |||
| 2248 | } | |||
| 2249 | // If the prior immediate dominator of ExitB was dominated by the | |||
| 2250 | // old preheader, then the old preheader becomes the new immediate | |||
| 2251 | // dominator. Otherwise don't change anything (because the newly | |||
| 2252 | // added blocks are dominated by the old preheader). | |||
| 2253 | if (ExitD && DT->dominates(Preheader, ExitD)) { | |||
| 2254 | DomTreeNode *BN = DT->getNode(ExitB); | |||
| 2255 | DomTreeNode *DN = DT->getNode(ExitD); | |||
| 2256 | BN->setIDom(DN); | |||
| 2257 | } | |||
| 2258 | ||||
| 2259 | // Add a call to memmove to the conditional block. | |||
| 2260 | IRBuilder<> CondBuilder(MemmoveB); | |||
| 2261 | CondBuilder.CreateBr(ExitB); | |||
| 2262 | CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); | |||
| 2263 | ||||
| 2264 | if (DestVolatile) { | |||
| 2265 | Type *Int32Ty = Type::getInt32Ty(Ctx); | |||
| 2266 | Type *Int32PtrTy = Type::getInt32PtrTy(Ctx); | |||
| 2267 | Type *VoidTy = Type::getVoidTy(Ctx); | |||
| 2268 | Module *M = Func->getParent(); | |||
| 2269 | FunctionCallee Fn = M->getOrInsertFunction( | |||
| 2270 | HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty); | |||
| 2271 | ||||
| 2272 | const SCEV *OneS = SE->getConstant(Int32Ty, 1); | |||
| 2273 | const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty); | |||
| 2274 | const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW); | |||
| 2275 | Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, | |||
| 2276 | MemmoveB->getTerminator()); | |||
| 2277 | if (Instruction *In = dyn_cast<Instruction>(NumWords)) | |||
| 2278 | if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) | |||
| 2279 | NumWords = Simp; | |||
| 2280 | ||||
| 2281 | Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) | |||
| 2282 | ? StoreBasePtr | |||
| 2283 | : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy); | |||
| 2284 | Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy) | |||
| 2285 | ? LoadBasePtr | |||
| 2286 | : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); | |||
| 2287 | NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); | |||
| 2288 | } else { | |||
| 2289 | NewCall = CondBuilder.CreateMemMove( | |||
| 2290 | StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes); | |||
| 2291 | } | |||
| 2292 | } else { | |||
| 2293 | NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, | |||
| 2294 | LI->getAlign(), NumBytes); | |||
| 2295 | // Okay, the memcpy has been formed. Zap the original store and | |||
| 2296 | // anything that feeds into it. | |||
| 2297 | RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); | |||
| 2298 | } | |||
| 2299 | ||||
| 2300 | NewCall->setDebugLoc(DLoc); | |||
| 2301 | ||||
| 2302 | LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
| 2303 | << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
| 2304 | << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
| 2305 | << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
| 2306 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ); | |||
| 2307 | ||||
| 2308 | return true; | |||
| 2309 | } | |||
| 2310 | ||||
| 2311 | // Check if the instructions in Insts, together with their dependencies | |||
| 2312 | // cover the loop in the sense that the loop could be safely eliminated once | |||
| 2313 | // the instructions in Insts are removed. | |||
| 2314 | bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, | |||
| 2315 | SmallVectorImpl<Instruction*> &Insts) const { | |||
| 2316 | SmallSet<BasicBlock*,8> LoopBlocks; | |||
| 2317 | for (auto *B : L->blocks()) | |||
| 2318 | LoopBlocks.insert(B); | |||
| 2319 | ||||
| 2320 | SetVector<Instruction*> Worklist(Insts.begin(), Insts.end()); | |||
| 2321 | ||||
| 2322 | // Collect all instructions from the loop that the instructions in Insts | |||
| 2323 | // depend on (plus their dependencies, etc.). These instructions will | |||
| 2324 | // constitute the expression trees that feed those in Insts, but the trees | |||
| 2325 | // will be limited only to instructions contained in the loop. | |||
| 2326 | for (unsigned i = 0; i < Worklist.size(); ++i) { | |||
| 2327 | Instruction *In = Worklist[i]; | |||
| 2328 | for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { | |||
| 2329 | Instruction *OpI = dyn_cast<Instruction>(I); | |||
| 2330 | if (!OpI) | |||
| 2331 | continue; | |||
| 2332 | BasicBlock *PB = OpI->getParent(); | |||
| 2333 | if (!LoopBlocks.count(PB)) | |||
| 2334 | continue; | |||
| 2335 | Worklist.insert(OpI); | |||
| 2336 | } | |||
| 2337 | } | |||
| 2338 | ||||
| 2339 | // Scan all instructions in the loop, if any of them have a user outside | |||
| 2340 | // of the loop, or outside of the expressions collected above, then either | |||
| 2341 | // the loop has a side-effect visible outside of it, or there are | |||
| 2342 | // instructions in it that are not involved in the original set Insts. | |||
| 2343 | for (auto *B : L->blocks()) { | |||
| 2344 | for (auto &In : *B) { | |||
| 2345 | if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In)) | |||
| 2346 | continue; | |||
| 2347 | if (!Worklist.count(&In) && In.mayHaveSideEffects()) | |||
| 2348 | return false; | |||
| 2349 | for (auto *K : In.users()) { | |||
| 2350 | Instruction *UseI = dyn_cast<Instruction>(K); | |||
| 2351 | if (!UseI) | |||
| 2352 | continue; | |||
| 2353 | BasicBlock *UseB = UseI->getParent(); | |||
| 2354 | if (LF->getLoopFor(UseB) != L) | |||
| 2355 | return false; | |||
| 2356 | } | |||
| 2357 | } | |||
| 2358 | } | |||
| 2359 | ||||
| 2360 | return true; | |||
| 2361 | } | |||
| 2362 | ||||
| 2363 | /// runOnLoopBlock - Process the specified block, which lives in a counted loop | |||
| 2364 | /// with the specified backedge count. This block is known to be in the current | |||
| 2365 | /// loop and not in any subloops. | |||
| 2366 | bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, | |||
| 2367 | const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) { | |||
| 2368 | // We can only promote stores in this block if they are unconditionally | |||
| 2369 | // executed in the loop. For a block to be unconditionally executed, it has | |||
| 2370 | // to dominate all the exit blocks of the loop. Verify this now. | |||
| 2371 | auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool { | |||
| 2372 | return DT->dominates(BB, EB); | |||
| 2373 | }; | |||
| 2374 | if (!all_of(ExitBlocks, DominatedByBB)) | |||
| 2375 | return false; | |||
| 2376 | ||||
| 2377 | bool MadeChange = false; | |||
| 2378 | // Look for store instructions, which may be optimized to memset/memcpy. | |||
| 2379 | SmallVector<StoreInst*,8> Stores; | |||
| 2380 | collectStores(CurLoop, BB, Stores); | |||
| 2381 | ||||
| 2382 | // Optimize the store into a memcpy, if it feeds an similarly strided load. | |||
| 2383 | for (auto &SI : Stores) | |||
| 2384 | MadeChange |= processCopyingStore(CurLoop, SI, BECount); | |||
| 2385 | ||||
| 2386 | return MadeChange; | |||
| 2387 | } | |||
| 2388 | ||||
| 2389 | bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) { | |||
| 2390 | PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE); | |||
| 2391 | if (PMR.recognize()) | |||
| 2392 | return true; | |||
| 2393 | ||||
| 2394 | if (!HasMemcpy && !HasMemmove) | |||
| 2395 | return false; | |||
| 2396 | ||||
| 2397 | const SCEV *BECount = SE->getBackedgeTakenCount(L); | |||
| 2398 | assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)) | |||
| 2399 | "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)) | |||
| 2400 | "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)); | |||
| 2401 | ||||
| 2402 | SmallVector<BasicBlock *, 8> ExitBlocks; | |||
| 2403 | L->getUniqueExitBlocks(ExitBlocks); | |||
| 2404 | ||||
| 2405 | bool Changed = false; | |||
| 2406 | ||||
| 2407 | // Scan all the blocks in the loop that are not in subloops. | |||
| 2408 | for (auto *BB : L->getBlocks()) { | |||
| 2409 | // Ignore blocks in subloops. | |||
| 2410 | if (LF->getLoopFor(BB) != L) | |||
| 2411 | continue; | |||
| 2412 | Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks); | |||
| 2413 | } | |||
| 2414 | ||||
| 2415 | return Changed; | |||
| 2416 | } | |||
| 2417 | ||||
| 2418 | bool HexagonLoopIdiomRecognize::run(Loop *L) { | |||
| 2419 | const Module &M = *L->getHeader()->getParent()->getParent(); | |||
| 2420 | if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon) | |||
| 2421 | return false; | |||
| 2422 | ||||
| 2423 | // If the loop could not be converted to canonical form, it must have an | |||
| 2424 | // indirectbr in it, just give up. | |||
| 2425 | if (!L->getLoopPreheader()) | |||
| 2426 | return false; | |||
| 2427 | ||||
| 2428 | // Disable loop idiom recognition if the function's name is a common idiom. | |||
| 2429 | StringRef Name = L->getHeader()->getParent()->getName(); | |||
| 2430 | if (Name == "memset" || Name == "memcpy" || Name == "memmove") | |||
| 2431 | return false; | |||
| 2432 | ||||
| 2433 | DL = &L->getHeader()->getModule()->getDataLayout(); | |||
| 2434 | ||||
| 2435 | HasMemcpy = TLI->has(LibFunc_memcpy); | |||
| 2436 | HasMemmove = TLI->has(LibFunc_memmove); | |||
| 2437 | ||||
| 2438 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) | |||
| 2439 | return runOnCountableLoop(L); | |||
| 2440 | return false; | |||
| 2441 | } | |||
| 2442 | ||||
| 2443 | bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L, | |||
| 2444 | LPPassManager &LPM) { | |||
| 2445 | if (skipLoop(L)) | |||
| 2446 | return false; | |||
| 2447 | ||||
| 2448 | auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
| 2449 | auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
| 2450 | auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |||
| 2451 | auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( | |||
| 2452 | *L->getHeader()->getParent()); | |||
| 2453 | auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | |||
| 2454 | return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L); | |||
| 2455 | } | |||
| 2456 | ||||
| 2457 | Pass *llvm::createHexagonLoopIdiomPass() { | |||
| 2458 | return new HexagonLoopIdiomRecognizeLegacyPass(); | |||
| 2459 | } | |||
| 2460 | ||||
| 2461 | PreservedAnalyses | |||
| 2462 | HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM, | |||
| 2463 | LoopStandardAnalysisResults &AR, | |||
| 2464 | LPMUpdater &U) { | |||
| 2465 | return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE) | |||
| 2466 | .run(&L) | |||
| 2467 | ? getLoopPassPreservedAnalyses() | |||
| 2468 | : PreservedAnalyses::all(); | |||
| 2469 | } |
| 1 | //===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===// | ||||||||
| 2 | // | ||||||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
| 6 | // | ||||||||
| 7 | //===----------------------------------------------------------------------===// | ||||||||
| 8 | /// | ||||||||
| 9 | /// \file | ||||||||
| 10 | /// This file implements a class to represent arbitrary precision | ||||||||
| 11 | /// integral constant values and operations on them. | ||||||||
| 12 | /// | ||||||||
| 13 | //===----------------------------------------------------------------------===// | ||||||||
| 14 | |||||||||
| 15 | #ifndef LLVM_ADT_APINT_H | ||||||||
| 16 | #define LLVM_ADT_APINT_H | ||||||||
| 17 | |||||||||
| 18 | #include "llvm/Support/Compiler.h" | ||||||||
| 19 | #include "llvm/Support/MathExtras.h" | ||||||||
| 20 | #include <cassert> | ||||||||
| 21 | #include <climits> | ||||||||
| 22 | #include <cstring> | ||||||||
| 23 | #include <optional> | ||||||||
| 24 | #include <utility> | ||||||||
| 25 | |||||||||
| 26 | namespace llvm { | ||||||||
| 27 | class FoldingSetNodeID; | ||||||||
| 28 | class StringRef; | ||||||||
| 29 | class hash_code; | ||||||||
| 30 | class raw_ostream; | ||||||||
| 31 | |||||||||
| 32 | template <typename T> class SmallVectorImpl; | ||||||||
| 33 | template <typename T> class ArrayRef; | ||||||||
| 34 | template <typename T, typename Enable> struct DenseMapInfo; | ||||||||
| 35 | |||||||||
| 36 | class APInt; | ||||||||
| 37 | |||||||||
| 38 | inline APInt operator-(APInt); | ||||||||
| 39 | |||||||||
| 40 | //===----------------------------------------------------------------------===// | ||||||||
| 41 | // APInt Class | ||||||||
| 42 | //===----------------------------------------------------------------------===// | ||||||||
| 43 | |||||||||
| 44 | /// Class for arbitrary precision integers. | ||||||||
| 45 | /// | ||||||||
| 46 | /// APInt is a functional replacement for common case unsigned integer type like | ||||||||
| 47 | /// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width | ||||||||
| 48 | /// integer sizes and large integer value types such as 3-bits, 15-bits, or more | ||||||||
| 49 | /// than 64-bits of precision. APInt provides a variety of arithmetic operators | ||||||||
| 50 | /// and methods to manipulate integer values of any bit-width. It supports both | ||||||||
| 51 | /// the typical integer arithmetic and comparison operations as well as bitwise | ||||||||
| 52 | /// manipulation. | ||||||||
| 53 | /// | ||||||||
| 54 | /// The class has several invariants worth noting: | ||||||||
| 55 | /// * All bit, byte, and word positions are zero-based. | ||||||||
| 56 | /// * Once the bit width is set, it doesn't change except by the Truncate, | ||||||||
| 57 | /// SignExtend, or ZeroExtend operations. | ||||||||
| 58 | /// * All binary operators must be on APInt instances of the same bit width. | ||||||||
| 59 | /// Attempting to use these operators on instances with different bit | ||||||||
| 60 | /// widths will yield an assertion. | ||||||||
| 61 | /// * The value is stored canonically as an unsigned value. For operations | ||||||||
| 62 | /// where it makes a difference, there are both signed and unsigned variants | ||||||||
| 63 | /// of the operation. For example, sdiv and udiv. However, because the bit | ||||||||
| 64 | /// widths must be the same, operations such as Mul and Add produce the same | ||||||||
| 65 | /// results regardless of whether the values are interpreted as signed or | ||||||||
| 66 | /// not. | ||||||||
| 67 | /// * In general, the class tries to follow the style of computation that LLVM | ||||||||
| 68 | /// uses in its IR. This simplifies its use for LLVM. | ||||||||
| 69 | /// * APInt supports zero-bit-width values, but operations that require bits | ||||||||
| 70 | /// are not defined on it (e.g. you cannot ask for the sign of a zero-bit | ||||||||
| 71 | /// integer). This means that operations like zero extension and logical | ||||||||
| 72 | /// shifts are defined, but sign extension and ashr is not. Zero bit values | ||||||||
| 73 | /// compare and hash equal to themselves, and countLeadingZeros returns 0. | ||||||||
| 74 | /// | ||||||||
| 75 | class [[nodiscard]] APInt { | ||||||||
| 76 | public: | ||||||||
| 77 | typedef uint64_t WordType; | ||||||||
| 78 | |||||||||
| 79 | /// This enum is used to hold the constants we needed for APInt. | ||||||||
| 80 | enum : unsigned { | ||||||||
| 81 | /// Byte size of a word. | ||||||||
| 82 | APINT_WORD_SIZE = sizeof(WordType), | ||||||||
| 83 | /// Bits in a word. | ||||||||
| 84 | APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8 | ||||||||
| 85 | }; | ||||||||
| 86 | |||||||||
| 87 | enum class Rounding { | ||||||||
| 88 | DOWN, | ||||||||
| 89 | TOWARD_ZERO, | ||||||||
| 90 | UP, | ||||||||
| 91 | }; | ||||||||
| 92 | |||||||||
| 93 | static constexpr WordType WORDTYPE_MAX = ~WordType(0); | ||||||||
| 94 | |||||||||
| 95 | /// \name Constructors | ||||||||
| 96 | /// @{ | ||||||||
| 97 | |||||||||
| 98 | /// Create a new APInt of numBits width, initialized as val. | ||||||||
| 99 | /// | ||||||||
| 100 | /// If isSigned is true then val is treated as if it were a signed value | ||||||||
| 101 | /// (i.e. as an int64_t) and the appropriate sign extension to the bit width | ||||||||
| 102 | /// will be done. Otherwise, no sign extension occurs (high order bits beyond | ||||||||
| 103 | /// the range of val are zero filled). | ||||||||
| 104 | /// | ||||||||
| 105 | /// \param numBits the bit width of the constructed APInt | ||||||||
| 106 | /// \param val the initial value of the APInt | ||||||||
| 107 | /// \param isSigned how to treat signedness of val | ||||||||
| 108 | APInt(unsigned numBits, uint64_t val, bool isSigned = false) | ||||||||
| 109 | : BitWidth(numBits) { | ||||||||
| 110 | if (isSingleWord()) { | ||||||||
| 111 | U.VAL = val; | ||||||||
| 112 | clearUnusedBits(); | ||||||||
| 113 | } else { | ||||||||
| 114 | initSlowCase(val, isSigned); | ||||||||
| 115 | } | ||||||||
| 116 | } | ||||||||
| 117 | |||||||||
| 118 | /// Construct an APInt of numBits width, initialized as bigVal[]. | ||||||||
| 119 | /// | ||||||||
| 120 | /// Note that bigVal.size() can be smaller or larger than the corresponding | ||||||||
| 121 | /// bit width but any extraneous bits will be dropped. | ||||||||
| 122 | /// | ||||||||
| 123 | /// \param numBits the bit width of the constructed APInt | ||||||||
| 124 | /// \param bigVal a sequence of words to form the initial value of the APInt | ||||||||
| 125 | APInt(unsigned numBits, ArrayRef<uint64_t> bigVal); | ||||||||
| 126 | |||||||||
| 127 | /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but | ||||||||
| 128 | /// deprecated because this constructor is prone to ambiguity with the | ||||||||
| 129 | /// APInt(unsigned, uint64_t, bool) constructor. | ||||||||
| 130 | /// | ||||||||
| 131 | /// If this overload is ever deleted, care should be taken to prevent calls | ||||||||
| 132 | /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool) | ||||||||
| 133 | /// constructor. | ||||||||
| 134 | APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]); | ||||||||
| 135 | |||||||||
| 136 | /// Construct an APInt from a string representation. | ||||||||
| 137 | /// | ||||||||
| 138 | /// This constructor interprets the string \p str in the given radix. The | ||||||||
| 139 | /// interpretation stops when the first character that is not suitable for the | ||||||||
| 140 | /// radix is encountered, or the end of the string. Acceptable radix values | ||||||||
| 141 | /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the | ||||||||
| 142 | /// string to require more bits than numBits. | ||||||||
| 143 | /// | ||||||||
| 144 | /// \param numBits the bit width of the constructed APInt | ||||||||
| 145 | /// \param str the string to be interpreted | ||||||||
| 146 | /// \param radix the radix to use for the conversion | ||||||||
| 147 | APInt(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
| 148 | |||||||||
| 149 | /// Default constructor that creates an APInt with a 1-bit zero value. | ||||||||
| 150 | explicit APInt() { U.VAL = 0; } | ||||||||
| 151 | |||||||||
| 152 | /// Copy Constructor. | ||||||||
| 153 | APInt(const APInt &that) : BitWidth(that.BitWidth) { | ||||||||
| 154 | if (isSingleWord()) | ||||||||
| 155 | U.VAL = that.U.VAL; | ||||||||
| 156 | else | ||||||||
| 157 | initSlowCase(that); | ||||||||
| 158 | } | ||||||||
| 159 | |||||||||
| 160 | /// Move Constructor. | ||||||||
| 161 | APInt(APInt &&that) : BitWidth(that.BitWidth) { | ||||||||
| 162 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
| 163 | that.BitWidth = 0; | ||||||||
| 164 | } | ||||||||
| 165 | |||||||||
| 166 | /// Destructor. | ||||||||
| 167 | ~APInt() { | ||||||||
| 168 | if (needsCleanup()) | ||||||||
| 169 | delete[] U.pVal; | ||||||||
| 170 | } | ||||||||
| 171 | |||||||||
| 172 | /// @} | ||||||||
| 173 | /// \name Value Generators | ||||||||
| 174 | /// @{ | ||||||||
| 175 | |||||||||
| 176 | /// Get the '0' value for the specified bit-width. | ||||||||
| 177 | static APInt getZero(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
| 178 | |||||||||
| 179 | /// NOTE: This is soft-deprecated. Please use `getZero()` instead. | ||||||||
| 180 | static APInt getNullValue(unsigned numBits) { return getZero(numBits); } | ||||||||
| 181 | |||||||||
| 182 | /// Return an APInt zero bits wide. | ||||||||
| 183 | static APInt getZeroWidth() { return getZero(0); } | ||||||||
| 184 | |||||||||
| 185 | /// Gets maximum unsigned value of APInt for specific bit width. | ||||||||
| 186 | static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); } | ||||||||
| 187 | |||||||||
| 188 | /// Gets maximum signed value of APInt for a specific bit width. | ||||||||
| 189 | static APInt getSignedMaxValue(unsigned numBits) { | ||||||||
| 190 | APInt API = getAllOnes(numBits); | ||||||||
| 191 | API.clearBit(numBits - 1); | ||||||||
| 192 | return API; | ||||||||
| 193 | } | ||||||||
| 194 | |||||||||
| 195 | /// Gets minimum unsigned value of APInt for a specific bit width. | ||||||||
| 196 | static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
| 197 | |||||||||
| 198 | /// Gets minimum signed value of APInt for a specific bit width. | ||||||||
| 199 | static APInt getSignedMinValue(unsigned numBits) { | ||||||||
| 200 | APInt API(numBits, 0); | ||||||||
| 201 | API.setBit(numBits - 1); | ||||||||
| 202 | return API; | ||||||||
| 203 | } | ||||||||
| 204 | |||||||||
| 205 | /// Get the SignMask for a specific bit width. | ||||||||
| 206 | /// | ||||||||
| 207 | /// This is just a wrapper function of getSignedMinValue(), and it helps code | ||||||||
| 208 | /// readability when we want to get a SignMask. | ||||||||
| 209 | static APInt getSignMask(unsigned BitWidth) { | ||||||||
| 210 | return getSignedMinValue(BitWidth); | ||||||||
| 211 | } | ||||||||
| 212 | |||||||||
| 213 | /// Return an APInt of a specified width with all bits set. | ||||||||
| 214 | static APInt getAllOnes(unsigned numBits) { | ||||||||
| 215 | return APInt(numBits, WORDTYPE_MAX, true); | ||||||||
| 216 | } | ||||||||
| 217 | |||||||||
| 218 | /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead. | ||||||||
| 219 | static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); } | ||||||||
| 220 | |||||||||
| 221 | /// Return an APInt with exactly one bit set in the result. | ||||||||
| 222 | static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { | ||||||||
| 223 | APInt Res(numBits, 0); | ||||||||
| 224 | Res.setBit(BitNo); | ||||||||
| 225 | return Res; | ||||||||
| 226 | } | ||||||||
| 227 | |||||||||
| 228 | /// Get a value with a block of bits set. | ||||||||
| 229 | /// | ||||||||
| 230 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
| 231 | /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other | ||||||||
| 232 | /// bits will be zero. For example, with parameters(32, 0, 16) you would get | ||||||||
| 233 | /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than | ||||||||
| 234 | /// \p hiBit. | ||||||||
| 235 | /// | ||||||||
| 236 | /// \param numBits the intended bit width of the result | ||||||||
| 237 | /// \param loBit the index of the lowest bit set. | ||||||||
| 238 | /// \param hiBit the index of the highest bit set. | ||||||||
| 239 | /// | ||||||||
| 240 | /// \returns An APInt value with the requested bits set. | ||||||||
| 241 | static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { | ||||||||
| 242 | APInt Res(numBits, 0); | ||||||||
| 243 | Res.setBits(loBit, hiBit); | ||||||||
| 244 | return Res; | ||||||||
| 245 | } | ||||||||
| 246 | |||||||||
| 247 | /// Wrap version of getBitsSet. | ||||||||
| 248 | /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet. | ||||||||
| 249 | /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example, | ||||||||
| 250 | /// with parameters (32, 28, 4), you would get 0xF000000F. | ||||||||
| 251 | /// If \p hiBit is equal to \p loBit, you would get a result with all bits | ||||||||
| 252 | /// set. | ||||||||
| 253 | static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, | ||||||||
| 254 | unsigned hiBit) { | ||||||||
| 255 | APInt Res(numBits, 0); | ||||||||
| 256 | Res.setBitsWithWrap(loBit, hiBit); | ||||||||
| 257 | return Res; | ||||||||
| 258 | } | ||||||||
| 259 | |||||||||
| 260 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
| 261 | /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other | ||||||||
| 262 | /// bits will be zero. For example, with parameters(32, 12) you would get | ||||||||
| 263 | /// 0xFFFFF000. | ||||||||
| 264 | /// | ||||||||
| 265 | /// \param numBits the intended bit width of the result | ||||||||
| 266 | /// \param loBit the index of the lowest bit to set. | ||||||||
| 267 | /// | ||||||||
| 268 | /// \returns An APInt value with the requested bits set. | ||||||||
| 269 | static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { | ||||||||
| 270 | APInt Res(numBits, 0); | ||||||||
| 271 | Res.setBitsFrom(loBit); | ||||||||
| 272 | return Res; | ||||||||
| 273 | } | ||||||||
| 274 | |||||||||
| 275 | /// Constructs an APInt value that has the top hiBitsSet bits set. | ||||||||
| 276 | /// | ||||||||
| 277 | /// \param numBits the bitwidth of the result | ||||||||
| 278 | /// \param hiBitsSet the number of high-order bits set in the result. | ||||||||
| 279 | static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { | ||||||||
| 280 | APInt Res(numBits, 0); | ||||||||
| 281 | Res.setHighBits(hiBitsSet); | ||||||||
| 282 | return Res; | ||||||||
| 283 | } | ||||||||
| 284 | |||||||||
| 285 | /// Constructs an APInt value that has the bottom loBitsSet bits set. | ||||||||
| 286 | /// | ||||||||
| 287 | /// \param numBits the bitwidth of the result | ||||||||
| 288 | /// \param loBitsSet the number of low-order bits set in the result. | ||||||||
| 289 | static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { | ||||||||
| 290 | APInt Res(numBits, 0); | ||||||||
| 291 | Res.setLowBits(loBitsSet); | ||||||||
| 292 | return Res; | ||||||||
| 293 | } | ||||||||
| 294 | |||||||||
| 295 | /// Return a value containing V broadcasted over NewLen bits. | ||||||||
| 296 | static APInt getSplat(unsigned NewLen, const APInt &V); | ||||||||
| 297 | |||||||||
| 298 | /// @} | ||||||||
| 299 | /// \name Value Tests | ||||||||
| 300 | /// @{ | ||||||||
| 301 | |||||||||
| 302 | /// Determine if this APInt just has one word to store value. | ||||||||
| 303 | /// | ||||||||
| 304 | /// \returns true if the number of bits <= 64, false otherwise. | ||||||||
| 305 | bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; } | ||||||||
| 306 | |||||||||
| 307 | /// Determine sign of this APInt. | ||||||||
| 308 | /// | ||||||||
| 309 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
| 310 | /// | ||||||||
| 311 | /// \returns true if this APInt is negative, false otherwise | ||||||||
| 312 | bool isNegative() const { return (*this)[BitWidth - 1]; } | ||||||||
| 313 | |||||||||
| 314 | /// Determine if this APInt Value is non-negative (>= 0) | ||||||||
| 315 | /// | ||||||||
| 316 | /// This tests the high bit of the APInt to determine if it is unset. | ||||||||
| 317 | bool isNonNegative() const { return !isNegative(); } | ||||||||
| 318 | |||||||||
| 319 | /// Determine if sign bit of this APInt is set. | ||||||||
| 320 | /// | ||||||||
| 321 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
| 322 | /// | ||||||||
| 323 | /// \returns true if this APInt has its sign bit set, false otherwise. | ||||||||
| 324 | bool isSignBitSet() const { return (*this)[BitWidth - 1]; } | ||||||||
| 325 | |||||||||
| 326 | /// Determine if sign bit of this APInt is clear. | ||||||||
| 327 | /// | ||||||||
| 328 | /// This tests the high bit of this APInt to determine if it is clear. | ||||||||
| 329 | /// | ||||||||
| 330 | /// \returns true if this APInt has its sign bit clear, false otherwise. | ||||||||
| 331 | bool isSignBitClear() const { return !isSignBitSet(); } | ||||||||
| 332 | |||||||||
| 333 | /// Determine if this APInt Value is positive. | ||||||||
| 334 | /// | ||||||||
| 335 | /// This tests if the value of this APInt is positive (> 0). Note | ||||||||
| 336 | /// that 0 is not a positive value. | ||||||||
| 337 | /// | ||||||||
| 338 | /// \returns true if this APInt is positive. | ||||||||
| 339 | bool isStrictlyPositive() const { return isNonNegative() && !isZero(); } | ||||||||
| 340 | |||||||||
| 341 | /// Determine if this APInt Value is non-positive (<= 0). | ||||||||
| 342 | /// | ||||||||
| 343 | /// \returns true if this APInt is non-positive. | ||||||||
| 344 | bool isNonPositive() const { return !isStrictlyPositive(); } | ||||||||
| 345 | |||||||||
| 346 | /// Determine if this APInt Value only has the specified bit set. | ||||||||
| 347 | /// | ||||||||
| 348 | /// \returns true if this APInt only has the specified bit set. | ||||||||
| 349 | bool isOneBitSet(unsigned BitNo) const { | ||||||||
| 350 | return (*this)[BitNo] && popcount() == 1; | ||||||||
| 351 | } | ||||||||
| 352 | |||||||||
| 353 | /// Determine if all bits are set. This is true for zero-width values. | ||||||||
| 354 | bool isAllOnes() const { | ||||||||
| 355 | if (BitWidth == 0) | ||||||||
| 356 | return true; | ||||||||
| 357 | if (isSingleWord()) | ||||||||
| 358 | return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth); | ||||||||
| 359 | return countTrailingOnesSlowCase() == BitWidth; | ||||||||
| 360 | } | ||||||||
| 361 | |||||||||
| 362 | /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead. | ||||||||
| 363 | bool isAllOnesValue() const { return isAllOnes(); } | ||||||||
| 364 | |||||||||
| 365 | /// Determine if this value is zero, i.e. all bits are clear. | ||||||||
| 366 | bool isZero() const { | ||||||||
| 367 | if (isSingleWord()) | ||||||||
| 368 | return U.VAL == 0; | ||||||||
| 369 | return countLeadingZerosSlowCase() == BitWidth; | ||||||||
| 370 | } | ||||||||
| 371 | |||||||||
| 372 | /// NOTE: This is soft-deprecated. Please use `isZero()` instead. | ||||||||
| 373 | bool isNullValue() const { return isZero(); } | ||||||||
| 374 | |||||||||
| 375 | /// Determine if this is a value of 1. | ||||||||
| 376 | /// | ||||||||
| 377 | /// This checks to see if the value of this APInt is one. | ||||||||
| 378 | bool isOne() const { | ||||||||
| 379 | if (isSingleWord()) | ||||||||
| 380 | return U.VAL == 1; | ||||||||
| 381 | return countLeadingZerosSlowCase() == BitWidth - 1; | ||||||||
| 382 | } | ||||||||
| 383 | |||||||||
| 384 | /// NOTE: This is soft-deprecated. Please use `isOne()` instead. | ||||||||
| 385 | bool isOneValue() const { return isOne(); } | ||||||||
| 386 | |||||||||
| 387 | /// Determine if this is the largest unsigned value. | ||||||||
| 388 | /// | ||||||||
| 389 | /// This checks to see if the value of this APInt is the maximum unsigned | ||||||||
| 390 | /// value for the APInt's bit width. | ||||||||
| 391 | bool isMaxValue() const { return isAllOnes(); } | ||||||||
| 392 | |||||||||
| 393 | /// Determine if this is the largest signed value. | ||||||||
| 394 | /// | ||||||||
| 395 | /// This checks to see if the value of this APInt is the maximum signed | ||||||||
| 396 | /// value for the APInt's bit width. | ||||||||
| 397 | bool isMaxSignedValue() const { | ||||||||
| 398 | if (isSingleWord()) { | ||||||||
| 399 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 399, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 400 | return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); | ||||||||
| 401 | } | ||||||||
| 402 | return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; | ||||||||
| 403 | } | ||||||||
| 404 | |||||||||
| 405 | /// Determine if this is the smallest unsigned value. | ||||||||
| 406 | /// | ||||||||
| 407 | /// This checks to see if the value of this APInt is the minimum unsigned | ||||||||
| 408 | /// value for the APInt's bit width. | ||||||||
| 409 | bool isMinValue() const { return isZero(); } | ||||||||
| 410 | |||||||||
| 411 | /// Determine if this is the smallest signed value. | ||||||||
| 412 | /// | ||||||||
| 413 | /// This checks to see if the value of this APInt is the minimum signed | ||||||||
| 414 | /// value for the APInt's bit width. | ||||||||
| 415 | bool isMinSignedValue() const { | ||||||||
| 416 | if (isSingleWord()) { | ||||||||
| 417 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 417, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 418 | return U.VAL == (WordType(1) << (BitWidth - 1)); | ||||||||
| 419 | } | ||||||||
| 420 | return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; | ||||||||
| 421 | } | ||||||||
| 422 | |||||||||
| 423 | /// Check if this APInt has an N-bits unsigned integer value. | ||||||||
| 424 | bool isIntN(unsigned N) const { return getActiveBits() <= N; } | ||||||||
| 425 | |||||||||
| 426 | /// Check if this APInt has an N-bits signed integer value. | ||||||||
| 427 | bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; } | ||||||||
| 428 | |||||||||
| 429 | /// Check if this APInt's value is a power of two greater than zero. | ||||||||
| 430 | /// | ||||||||
| 431 | /// \returns true if the argument APInt value is a power of two > 0. | ||||||||
| 432 | bool isPowerOf2() const { | ||||||||
| 433 | if (isSingleWord()) { | ||||||||
| 434 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 434, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 435 | return isPowerOf2_64(U.VAL); | ||||||||
| 436 | } | ||||||||
| 437 | return countPopulationSlowCase() == 1; | ||||||||
| 438 | } | ||||||||
| 439 | |||||||||
| 440 | /// Check if this APInt's negated value is a power of two greater than zero. | ||||||||
| 441 | bool isNegatedPowerOf2() const { | ||||||||
| 442 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 442, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 443 | if (isNonNegative()) | ||||||||
| 444 | return false; | ||||||||
| 445 | // NegatedPowerOf2 - shifted mask in the top bits. | ||||||||
| 446 | unsigned LO = countl_one(); | ||||||||
| 447 | unsigned TZ = countr_zero(); | ||||||||
| 448 | return (LO + TZ) == BitWidth; | ||||||||
| 449 | } | ||||||||
| 450 | |||||||||
| 451 | /// Check if the APInt's value is returned by getSignMask. | ||||||||
| 452 | /// | ||||||||
| 453 | /// \returns true if this is the value returned by getSignMask. | ||||||||
| 454 | bool isSignMask() const { return isMinSignedValue(); } | ||||||||
| 455 | |||||||||
| 456 | /// Convert APInt to a boolean value. | ||||||||
| 457 | /// | ||||||||
| 458 | /// This converts the APInt to a boolean value as a test against zero. | ||||||||
| 459 | bool getBoolValue() const { return !isZero(); } | ||||||||
| 460 | |||||||||
| 461 | /// If this value is smaller than the specified limit, return it, otherwise | ||||||||
| 462 | /// return the limit value. This causes the value to saturate to the limit. | ||||||||
| 463 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const { | ||||||||
| 464 | return ugt(Limit) ? Limit : getZExtValue(); | ||||||||
| 465 | } | ||||||||
| 466 | |||||||||
| 467 | /// Check if the APInt consists of a repeated bit pattern. | ||||||||
| 468 | /// | ||||||||
| 469 | /// e.g. 0x01010101 satisfies isSplat(8). | ||||||||
| 470 | /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit | ||||||||
| 471 | /// width without remainder. | ||||||||
| 472 | bool isSplat(unsigned SplatSizeInBits) const; | ||||||||
| 473 | |||||||||
| 474 | /// \returns true if this APInt value is a sequence of \param numBits ones | ||||||||
| 475 | /// starting at the least significant bit with the remainder zero. | ||||||||
| 476 | bool isMask(unsigned numBits) const { | ||||||||
| 477 | assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero" ) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\"" , "llvm/include/llvm/ADT/APInt.h", 477, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 478 | assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range" ) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\"" , "llvm/include/llvm/ADT/APInt.h", 478, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 479 | if (isSingleWord()) | ||||||||
| 480 | return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits)); | ||||||||
| 481 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
| 482 | return (numBits == Ones) && | ||||||||
| 483 | ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
| 484 | } | ||||||||
| 485 | |||||||||
| 486 | /// \returns true if this APInt is a non-empty sequence of ones starting at | ||||||||
| 487 | /// the least significant bit with the remainder zero. | ||||||||
| 488 | /// Ex. isMask(0x0000FFFFU) == true. | ||||||||
| 489 | bool isMask() const { | ||||||||
| 490 | if (isSingleWord()) | ||||||||
| 491 | return isMask_64(U.VAL); | ||||||||
| 492 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
| 493 | return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
| 494 | } | ||||||||
| 495 | |||||||||
| 496 | /// Return true if this APInt value contains a non-empty sequence of ones with | ||||||||
| 497 | /// the remainder zero. | ||||||||
| 498 | bool isShiftedMask() const { | ||||||||
| 499 | if (isSingleWord()) | ||||||||
| 500 | return isShiftedMask_64(U.VAL); | ||||||||
| 501 | unsigned Ones = countPopulationSlowCase(); | ||||||||
| 502 | unsigned LeadZ = countLeadingZerosSlowCase(); | ||||||||
| 503 | return (Ones + LeadZ + countr_zero()) == BitWidth; | ||||||||
| 504 | } | ||||||||
| 505 | |||||||||
| 506 | /// Return true if this APInt value contains a non-empty sequence of ones with | ||||||||
| 507 | /// the remainder zero. If true, \p MaskIdx will specify the index of the | ||||||||
| 508 | /// lowest set bit and \p MaskLen is updated to specify the length of the | ||||||||
| 509 | /// mask, else neither are updated. | ||||||||
| 510 | bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const { | ||||||||
| 511 | if (isSingleWord()) | ||||||||
| 512 | return isShiftedMask_64(U.VAL, MaskIdx, MaskLen); | ||||||||
| 513 | unsigned Ones = countPopulationSlowCase(); | ||||||||
| 514 | unsigned LeadZ = countLeadingZerosSlowCase(); | ||||||||
| 515 | unsigned TrailZ = countTrailingZerosSlowCase(); | ||||||||
| 516 | if ((Ones + LeadZ + TrailZ) != BitWidth) | ||||||||
| 517 | return false; | ||||||||
| 518 | MaskLen = Ones; | ||||||||
| 519 | MaskIdx = TrailZ; | ||||||||
| 520 | return true; | ||||||||
| 521 | } | ||||||||
| 522 | |||||||||
| 523 | /// Compute an APInt containing numBits highbits from this APInt. | ||||||||
| 524 | /// | ||||||||
| 525 | /// Get an APInt with the same BitWidth as this APInt, just zero mask the low | ||||||||
| 526 | /// bits and right shift to the least significant bit. | ||||||||
| 527 | /// | ||||||||
| 528 | /// \returns the high "numBits" bits of this APInt. | ||||||||
| 529 | APInt getHiBits(unsigned numBits) const; | ||||||||
| 530 | |||||||||
| 531 | /// Compute an APInt containing numBits lowbits from this APInt. | ||||||||
| 532 | /// | ||||||||
| 533 | /// Get an APInt with the same BitWidth as this APInt, just zero mask the high | ||||||||
| 534 | /// bits. | ||||||||
| 535 | /// | ||||||||
| 536 | /// \returns the low "numBits" bits of this APInt. | ||||||||
| 537 | APInt getLoBits(unsigned numBits) const; | ||||||||
| 538 | |||||||||
| 539 | /// Determine if two APInts have the same value, after zero-extending | ||||||||
| 540 | /// one of them (if needed!) to ensure that the bit-widths match. | ||||||||
| 541 | static bool isSameValue(const APInt &I1, const APInt &I2) { | ||||||||
| 542 | if (I1.getBitWidth() == I2.getBitWidth()) | ||||||||
| 543 | return I1 == I2; | ||||||||
| 544 | |||||||||
| 545 | if (I1.getBitWidth() > I2.getBitWidth()) | ||||||||
| 546 | return I1 == I2.zext(I1.getBitWidth()); | ||||||||
| 547 | |||||||||
| 548 | return I1.zext(I2.getBitWidth()) == I2; | ||||||||
| 549 | } | ||||||||
| 550 | |||||||||
| 551 | /// Overload to compute a hash_code for an APInt value. | ||||||||
| 552 | friend hash_code hash_value(const APInt &Arg); | ||||||||
| 553 | |||||||||
| 554 | /// This function returns a pointer to the internal storage of the APInt. | ||||||||
| 555 | /// This is useful for writing out the APInt in binary form without any | ||||||||
| 556 | /// conversions. | ||||||||
| 557 | const uint64_t *getRawData() const { | ||||||||
| 558 | if (isSingleWord()) | ||||||||
| 559 | return &U.VAL; | ||||||||
| 560 | return &U.pVal[0]; | ||||||||
| 561 | } | ||||||||
| 562 | |||||||||
| 563 | /// @} | ||||||||
| 564 | /// \name Unary Operators | ||||||||
| 565 | /// @{ | ||||||||
| 566 | |||||||||
| 567 | /// Postfix increment operator. Increment *this by 1. | ||||||||
| 568 | /// | ||||||||
| 569 | /// \returns a new APInt value representing the original value of *this. | ||||||||
| 570 | APInt operator++(int) { | ||||||||
| 571 | APInt API(*this); | ||||||||
| 572 | ++(*this); | ||||||||
| 573 | return API; | ||||||||
| 574 | } | ||||||||
| 575 | |||||||||
| 576 | /// Prefix increment operator. | ||||||||
| 577 | /// | ||||||||
| 578 | /// \returns *this incremented by one | ||||||||
| 579 | APInt &operator++(); | ||||||||
| 580 | |||||||||
| 581 | /// Postfix decrement operator. Decrement *this by 1. | ||||||||
| 582 | /// | ||||||||
| 583 | /// \returns a new APInt value representing the original value of *this. | ||||||||
| 584 | APInt operator--(int) { | ||||||||
| 585 | APInt API(*this); | ||||||||
| 586 | --(*this); | ||||||||
| 587 | return API; | ||||||||
| 588 | } | ||||||||
| 589 | |||||||||
| 590 | /// Prefix decrement operator. | ||||||||
| 591 | /// | ||||||||
| 592 | /// \returns *this decremented by one. | ||||||||
| 593 | APInt &operator--(); | ||||||||
| 594 | |||||||||
| 595 | /// Logical negation operation on this APInt returns true if zero, like normal | ||||||||
| 596 | /// integers. | ||||||||
| 597 | bool operator!() const { return isZero(); } | ||||||||
| 598 | |||||||||
| 599 | /// @} | ||||||||
| 600 | /// \name Assignment Operators | ||||||||
| 601 | /// @{ | ||||||||
| 602 | |||||||||
| 603 | /// Copy assignment operator. | ||||||||
| 604 | /// | ||||||||
| 605 | /// \returns *this after assignment of RHS. | ||||||||
| 606 | APInt &operator=(const APInt &RHS) { | ||||||||
| 607 | // The common case (both source or dest being inline) doesn't require | ||||||||
| 608 | // allocation or deallocation. | ||||||||
| 609 | if (isSingleWord() && RHS.isSingleWord()) { | ||||||||
| 610 | U.VAL = RHS.U.VAL; | ||||||||
| 611 | BitWidth = RHS.BitWidth; | ||||||||
| 612 | return *this; | ||||||||
| 613 | } | ||||||||
| 614 | |||||||||
| 615 | assignSlowCase(RHS); | ||||||||
| 616 | return *this; | ||||||||
| 617 | } | ||||||||
| 618 | |||||||||
| 619 | /// Move assignment operator. | ||||||||
| 620 | APInt &operator=(APInt &&that) { | ||||||||
| 621 | #ifdef EXPENSIVE_CHECKS | ||||||||
| 622 | // Some std::shuffle implementations still do self-assignment. | ||||||||
| 623 | if (this == &that) | ||||||||
| 624 | return *this; | ||||||||
| 625 | #endif | ||||||||
| 626 | assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported" ) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\"" , "llvm/include/llvm/ADT/APInt.h", 626, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 627 | if (!isSingleWord()) | ||||||||
| 628 | delete[] U.pVal; | ||||||||
| 629 | |||||||||
| 630 | // Use memcpy so that type based alias analysis sees both VAL and pVal | ||||||||
| 631 | // as modified. | ||||||||
| 632 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
| 633 | |||||||||
| 634 | BitWidth = that.BitWidth; | ||||||||
| 635 | that.BitWidth = 0; | ||||||||
| 636 | return *this; | ||||||||
| 637 | } | ||||||||
| 638 | |||||||||
| 639 | /// Assignment operator. | ||||||||
| 640 | /// | ||||||||
| 641 | /// The RHS value is assigned to *this. If the significant bits in RHS exceed | ||||||||
| 642 | /// the bit width, the excess bits are truncated. If the bit width is larger | ||||||||
| 643 | /// than 64, the value is zero filled in the unspecified high order bits. | ||||||||
| 644 | /// | ||||||||
| 645 | /// \returns *this after assignment of RHS value. | ||||||||
| 646 | APInt &operator=(uint64_t RHS) { | ||||||||
| 647 | if (isSingleWord()) { | ||||||||
| 648 | U.VAL = RHS; | ||||||||
| 649 | return clearUnusedBits(); | ||||||||
| 650 | } | ||||||||
| 651 | U.pVal[0] = RHS; | ||||||||
| 652 | memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
| 653 | return *this; | ||||||||
| 654 | } | ||||||||
| 655 | |||||||||
| 656 | /// Bitwise AND assignment operator. | ||||||||
| 657 | /// | ||||||||
| 658 | /// Performs a bitwise AND operation on this APInt and RHS. The result is | ||||||||
| 659 | /// assigned to *this. | ||||||||
| 660 | /// | ||||||||
| 661 | /// \returns *this after ANDing with RHS. | ||||||||
| 662 | APInt &operator&=(const APInt &RHS) { | ||||||||
| 663 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 663, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 664 | if (isSingleWord()) | ||||||||
| 665 | U.VAL &= RHS.U.VAL; | ||||||||
| 666 | else | ||||||||
| 667 | andAssignSlowCase(RHS); | ||||||||
| 668 | return *this; | ||||||||
| 669 | } | ||||||||
| 670 | |||||||||
| 671 | /// Bitwise AND assignment operator. | ||||||||
| 672 | /// | ||||||||
| 673 | /// Performs a bitwise AND operation on this APInt and RHS. RHS is | ||||||||
| 674 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
| 675 | /// the LHS. | ||||||||
| 676 | APInt &operator&=(uint64_t RHS) { | ||||||||
| 677 | if (isSingleWord()) { | ||||||||
| 678 | U.VAL &= RHS; | ||||||||
| 679 | return *this; | ||||||||
| 680 | } | ||||||||
| 681 | U.pVal[0] &= RHS; | ||||||||
| 682 | memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
| 683 | return *this; | ||||||||
| 684 | } | ||||||||
| 685 | |||||||||
| 686 | /// Bitwise OR assignment operator. | ||||||||
| 687 | /// | ||||||||
| 688 | /// Performs a bitwise OR operation on this APInt and RHS. The result is | ||||||||
| 689 | /// assigned *this; | ||||||||
| 690 | /// | ||||||||
| 691 | /// \returns *this after ORing with RHS. | ||||||||
| 692 | APInt &operator|=(const APInt &RHS) { | ||||||||
| 693 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 693, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 694 | if (isSingleWord()) | ||||||||
| 695 | U.VAL |= RHS.U.VAL; | ||||||||
| 696 | else | ||||||||
| 697 | orAssignSlowCase(RHS); | ||||||||
| 698 | return *this; | ||||||||
| 699 | } | ||||||||
| 700 | |||||||||
| 701 | /// Bitwise OR assignment operator. | ||||||||
| 702 | /// | ||||||||
| 703 | /// Performs a bitwise OR operation on this APInt and RHS. RHS is | ||||||||
| 704 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
| 705 | /// the LHS. | ||||||||
| 706 | APInt &operator|=(uint64_t RHS) { | ||||||||
| 707 | if (isSingleWord()) { | ||||||||
| 708 | U.VAL |= RHS; | ||||||||
| 709 | return clearUnusedBits(); | ||||||||
| 710 | } | ||||||||
| 711 | U.pVal[0] |= RHS; | ||||||||
| 712 | return *this; | ||||||||
| 713 | } | ||||||||
| 714 | |||||||||
| 715 | /// Bitwise XOR assignment operator. | ||||||||
| 716 | /// | ||||||||
| 717 | /// Performs a bitwise XOR operation on this APInt and RHS. The result is | ||||||||
| 718 | /// assigned to *this. | ||||||||
| 719 | /// | ||||||||
| 720 | /// \returns *this after XORing with RHS. | ||||||||
| 721 | APInt &operator^=(const APInt &RHS) { | ||||||||
| 722 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 722, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 723 | if (isSingleWord()) | ||||||||
| 724 | U.VAL ^= RHS.U.VAL; | ||||||||
| 725 | else | ||||||||
| 726 | xorAssignSlowCase(RHS); | ||||||||
| 727 | return *this; | ||||||||
| 728 | } | ||||||||
| 729 | |||||||||
| 730 | /// Bitwise XOR assignment operator. | ||||||||
| 731 | /// | ||||||||
| 732 | /// Performs a bitwise XOR operation on this APInt and RHS. RHS is | ||||||||
| 733 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
| 734 | /// the LHS. | ||||||||
| 735 | APInt &operator^=(uint64_t RHS) { | ||||||||
| 736 | if (isSingleWord()) { | ||||||||
| 737 | U.VAL ^= RHS; | ||||||||
| 738 | return clearUnusedBits(); | ||||||||
| 739 | } | ||||||||
| 740 | U.pVal[0] ^= RHS; | ||||||||
| 741 | return *this; | ||||||||
| 742 | } | ||||||||
| 743 | |||||||||
| 744 | /// Multiplication assignment operator. | ||||||||
| 745 | /// | ||||||||
| 746 | /// Multiplies this APInt by RHS and assigns the result to *this. | ||||||||
| 747 | /// | ||||||||
| 748 | /// \returns *this | ||||||||
| 749 | APInt &operator*=(const APInt &RHS); | ||||||||
| 750 | APInt &operator*=(uint64_t RHS); | ||||||||
| 751 | |||||||||
| 752 | /// Addition assignment operator. | ||||||||
| 753 | /// | ||||||||
| 754 | /// Adds RHS to *this and assigns the result to *this. | ||||||||
| 755 | /// | ||||||||
| 756 | /// \returns *this | ||||||||
| 757 | APInt &operator+=(const APInt &RHS); | ||||||||
| 758 | APInt &operator+=(uint64_t RHS); | ||||||||
| 759 | |||||||||
| 760 | /// Subtraction assignment operator. | ||||||||
| 761 | /// | ||||||||
| 762 | /// Subtracts RHS from *this and assigns the result to *this. | ||||||||
| 763 | /// | ||||||||
| 764 | /// \returns *this | ||||||||
| 765 | APInt &operator-=(const APInt &RHS); | ||||||||
| 766 | APInt &operator-=(uint64_t RHS); | ||||||||
| 767 | |||||||||
| 768 | /// Left-shift assignment function. | ||||||||
| 769 | /// | ||||||||
| 770 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
| 771 | /// | ||||||||
| 772 | /// \returns *this after shifting left by ShiftAmt | ||||||||
| 773 | APInt &operator<<=(unsigned ShiftAmt) { | ||||||||
| 774 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 774, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 775 | if (isSingleWord()) { | ||||||||
| 776 | if (ShiftAmt == BitWidth) | ||||||||
| 777 | U.VAL = 0; | ||||||||
| 778 | else | ||||||||
| 779 | U.VAL <<= ShiftAmt; | ||||||||
| 780 | return clearUnusedBits(); | ||||||||
| 781 | } | ||||||||
| 782 | shlSlowCase(ShiftAmt); | ||||||||
| 783 | return *this; | ||||||||
| 784 | } | ||||||||
| 785 | |||||||||
| 786 | /// Left-shift assignment function. | ||||||||
| 787 | /// | ||||||||
| 788 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
| 789 | /// | ||||||||
| 790 | /// \returns *this after shifting left by ShiftAmt | ||||||||
| 791 | APInt &operator<<=(const APInt &ShiftAmt); | ||||||||
| 792 | |||||||||
| 793 | /// @} | ||||||||
| 794 | /// \name Binary Operators | ||||||||
| 795 | /// @{ | ||||||||
| 796 | |||||||||
| 797 | /// Multiplication operator. | ||||||||
| 798 | /// | ||||||||
| 799 | /// Multiplies this APInt by RHS and returns the result. | ||||||||
| 800 | APInt operator*(const APInt &RHS) const; | ||||||||
| 801 | |||||||||
| 802 | /// Left logical shift operator. | ||||||||
| 803 | /// | ||||||||
| 804 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
| 805 | APInt operator<<(unsigned Bits) const { return shl(Bits); } | ||||||||
| 806 | |||||||||
| 807 | /// Left logical shift operator. | ||||||||
| 808 | /// | ||||||||
| 809 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
| 810 | APInt operator<<(const APInt &Bits) const { return shl(Bits); } | ||||||||
| 811 | |||||||||
| 812 | /// Arithmetic right-shift function. | ||||||||
| 813 | /// | ||||||||
| 814 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
| 815 | APInt ashr(unsigned ShiftAmt) const { | ||||||||
| 816 | APInt R(*this); | ||||||||
| 817 | R.ashrInPlace(ShiftAmt); | ||||||||
| 818 | return R; | ||||||||
| 819 | } | ||||||||
| 820 | |||||||||
| 821 | /// Arithmetic right-shift this APInt by ShiftAmt in place. | ||||||||
| 822 | void ashrInPlace(unsigned ShiftAmt) { | ||||||||
| 823 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 823, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 824 | if (isSingleWord()) { | ||||||||
| 825 | int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); | ||||||||
| 826 | if (ShiftAmt == BitWidth) | ||||||||
| 827 | U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. | ||||||||
| 828 | else | ||||||||
| 829 | U.VAL = SExtVAL >> ShiftAmt; | ||||||||
| 830 | clearUnusedBits(); | ||||||||
| 831 | return; | ||||||||
| 832 | } | ||||||||
| 833 | ashrSlowCase(ShiftAmt); | ||||||||
| 834 | } | ||||||||
| 835 | |||||||||
| 836 | /// Logical right-shift function. | ||||||||
| 837 | /// | ||||||||
| 838 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
| 839 | APInt lshr(unsigned shiftAmt) const { | ||||||||
| 840 | APInt R(*this); | ||||||||
| 841 | R.lshrInPlace(shiftAmt); | ||||||||
| 842 | return R; | ||||||||
| 843 | } | ||||||||
| 844 | |||||||||
| 845 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
| 846 | void lshrInPlace(unsigned ShiftAmt) { | ||||||||
| 847 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 847, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 848 | if (isSingleWord()) { | ||||||||
| 849 | if (ShiftAmt == BitWidth) | ||||||||
| 850 | U.VAL = 0; | ||||||||
| 851 | else | ||||||||
| 852 | U.VAL >>= ShiftAmt; | ||||||||
| 853 | return; | ||||||||
| 854 | } | ||||||||
| 855 | lshrSlowCase(ShiftAmt); | ||||||||
| 856 | } | ||||||||
| 857 | |||||||||
| 858 | /// Left-shift function. | ||||||||
| 859 | /// | ||||||||
| 860 | /// Left-shift this APInt by shiftAmt. | ||||||||
| 861 | APInt shl(unsigned shiftAmt) const { | ||||||||
| 862 | APInt R(*this); | ||||||||
| 863 | R <<= shiftAmt; | ||||||||
| 864 | return R; | ||||||||
| 865 | } | ||||||||
| 866 | |||||||||
| 867 | /// relative logical shift right | ||||||||
| 868 | APInt relativeLShr(int RelativeShift) const { | ||||||||
| 869 | return RelativeShift > 0 ? lshr(RelativeShift) : shl(-RelativeShift); | ||||||||
| 870 | } | ||||||||
| 871 | |||||||||
| 872 | /// relative logical shift left | ||||||||
| 873 | APInt relativeLShl(int RelativeShift) const { | ||||||||
| 874 | return relativeLShr(-RelativeShift); | ||||||||
| 875 | } | ||||||||
| 876 | |||||||||
| 877 | /// relative arithmetic shift right | ||||||||
| 878 | APInt relativeAShr(int RelativeShift) const { | ||||||||
| 879 | return RelativeShift > 0 ? ashr(RelativeShift) : shl(-RelativeShift); | ||||||||
| 880 | } | ||||||||
| 881 | |||||||||
| 882 | /// relative arithmetic shift left | ||||||||
| 883 | APInt relativeAShl(int RelativeShift) const { | ||||||||
| 884 | return relativeAShr(-RelativeShift); | ||||||||
| 885 | } | ||||||||
| 886 | |||||||||
| 887 | /// Rotate left by rotateAmt. | ||||||||
| 888 | APInt rotl(unsigned rotateAmt) const; | ||||||||
| 889 | |||||||||
| 890 | /// Rotate right by rotateAmt. | ||||||||
| 891 | APInt rotr(unsigned rotateAmt) const; | ||||||||
| 892 | |||||||||
| 893 | /// Arithmetic right-shift function. | ||||||||
| 894 | /// | ||||||||
| 895 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
| 896 | APInt ashr(const APInt &ShiftAmt) const { | ||||||||
| 897 | APInt R(*this); | ||||||||
| 898 | R.ashrInPlace(ShiftAmt); | ||||||||
| 899 | return R; | ||||||||
| 900 | } | ||||||||
| 901 | |||||||||
| 902 | /// Arithmetic right-shift this APInt by shiftAmt in place. | ||||||||
| 903 | void ashrInPlace(const APInt &shiftAmt); | ||||||||
| 904 | |||||||||
| 905 | /// Logical right-shift function. | ||||||||
| 906 | /// | ||||||||
| 907 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
| 908 | APInt lshr(const APInt &ShiftAmt) const { | ||||||||
| 909 | APInt R(*this); | ||||||||
| 910 | R.lshrInPlace(ShiftAmt); | ||||||||
| 911 | return R; | ||||||||
| 912 | } | ||||||||
| 913 | |||||||||
| 914 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
| 915 | void lshrInPlace(const APInt &ShiftAmt); | ||||||||
| 916 | |||||||||
| 917 | /// Left-shift function. | ||||||||
| 918 | /// | ||||||||
| 919 | /// Left-shift this APInt by shiftAmt. | ||||||||
| 920 | APInt shl(const APInt &ShiftAmt) const { | ||||||||
| 921 | APInt R(*this); | ||||||||
| 922 | R <<= ShiftAmt; | ||||||||
| 923 | return R; | ||||||||
| 924 | } | ||||||||
| 925 | |||||||||
| 926 | /// Rotate left by rotateAmt. | ||||||||
| 927 | APInt rotl(const APInt &rotateAmt) const; | ||||||||
| 928 | |||||||||
| 929 | /// Rotate right by rotateAmt. | ||||||||
| 930 | APInt rotr(const APInt &rotateAmt) const; | ||||||||
| 931 | |||||||||
| 932 | /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is | ||||||||
| 933 | /// equivalent to: | ||||||||
| 934 | /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth) | ||||||||
| 935 | APInt concat(const APInt &NewLSB) const { | ||||||||
| 936 | /// If the result will be small, then both the merged values are small. | ||||||||
| 937 | unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth(); | ||||||||
| 938 | if (NewWidth <= APINT_BITS_PER_WORD) | ||||||||
| 939 | return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL); | ||||||||
| 940 | return concatSlowCase(NewLSB); | ||||||||
| 941 | } | ||||||||
| 942 | |||||||||
| 943 | /// Unsigned division operation. | ||||||||
| 944 | /// | ||||||||
| 945 | /// Perform an unsigned divide operation on this APInt by RHS. Both this and | ||||||||
| 946 | /// RHS are treated as unsigned quantities for purposes of this division. | ||||||||
| 947 | /// | ||||||||
| 948 | /// \returns a new APInt value containing the division result, rounded towards | ||||||||
| 949 | /// zero. | ||||||||
| 950 | APInt udiv(const APInt &RHS) const; | ||||||||
| 951 | APInt udiv(uint64_t RHS) const; | ||||||||
| 952 | |||||||||
| 953 | /// Signed division function for APInt. | ||||||||
| 954 | /// | ||||||||
| 955 | /// Signed divide this APInt by APInt RHS. | ||||||||
| 956 | /// | ||||||||
| 957 | /// The result is rounded towards zero. | ||||||||
| 958 | APInt sdiv(const APInt &RHS) const; | ||||||||
| 959 | APInt sdiv(int64_t RHS) const; | ||||||||
| 960 | |||||||||
| 961 | /// Unsigned remainder operation. | ||||||||
| 962 | /// | ||||||||
| 963 | /// Perform an unsigned remainder operation on this APInt with RHS being the | ||||||||
| 964 | /// divisor. Both this and RHS are treated as unsigned quantities for purposes | ||||||||
| 965 | /// of this operation. | ||||||||
| 966 | /// | ||||||||
| 967 | /// \returns a new APInt value containing the remainder result | ||||||||
| 968 | APInt urem(const APInt &RHS) const; | ||||||||
| 969 | uint64_t urem(uint64_t RHS) const; | ||||||||
| 970 | |||||||||
| 971 | /// Function for signed remainder operation. | ||||||||
| 972 | /// | ||||||||
| 973 | /// Signed remainder operation on APInt. | ||||||||
| 974 | /// | ||||||||
| 975 | /// Note that this is a true remainder operation and not a modulo operation | ||||||||
| 976 | /// because the sign follows the sign of the dividend which is *this. | ||||||||
| 977 | APInt srem(const APInt &RHS) const; | ||||||||
| 978 | int64_t srem(int64_t RHS) const; | ||||||||
| 979 | |||||||||
| 980 | /// Dual division/remainder interface. | ||||||||
| 981 | /// | ||||||||
| 982 | /// Sometimes it is convenient to divide two APInt values and obtain both the | ||||||||
| 983 | /// quotient and remainder. This function does both operations in the same | ||||||||
| 984 | /// computation making it a little more efficient. The pair of input arguments | ||||||||
| 985 | /// may overlap with the pair of output arguments. It is safe to call | ||||||||
| 986 | /// udivrem(X, Y, X, Y), for example. | ||||||||
| 987 | static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
| 988 | APInt &Remainder); | ||||||||
| 989 | static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, | ||||||||
| 990 | uint64_t &Remainder); | ||||||||
| 991 | |||||||||
| 992 | static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
| 993 | APInt &Remainder); | ||||||||
| 994 | static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, | ||||||||
| 995 | int64_t &Remainder); | ||||||||
| 996 | |||||||||
| 997 | // Operations that return overflow indicators. | ||||||||
| 998 | APInt sadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 999 | APInt uadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1000 | APInt ssub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1001 | APInt usub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1002 | APInt sdiv_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1003 | APInt smul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1004 | APInt umul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
| 1005 | APInt sshl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
| 1006 | APInt ushl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
| 1007 | |||||||||
| 1008 | // Operations that saturate | ||||||||
| 1009 | APInt sadd_sat(const APInt &RHS) const; | ||||||||
| 1010 | APInt uadd_sat(const APInt &RHS) const; | ||||||||
| 1011 | APInt ssub_sat(const APInt &RHS) const; | ||||||||
| 1012 | APInt usub_sat(const APInt &RHS) const; | ||||||||
| 1013 | APInt smul_sat(const APInt &RHS) const; | ||||||||
| 1014 | APInt umul_sat(const APInt &RHS) const; | ||||||||
| 1015 | APInt sshl_sat(const APInt &RHS) const; | ||||||||
| 1016 | APInt ushl_sat(const APInt &RHS) const; | ||||||||
| 1017 | |||||||||
| 1018 | /// Array-indexing support. | ||||||||
| 1019 | /// | ||||||||
| 1020 | /// \returns the bit value at bitPosition | ||||||||
| 1021 | bool operator[](unsigned bitPosition) const { | ||||||||
| 1022 | assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() && "Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\"" , "llvm/include/llvm/ADT/APInt.h", 1022, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1023 | return (maskBit(bitPosition) & getWord(bitPosition)) != 0; | ||||||||
| 1024 | } | ||||||||
| 1025 | |||||||||
| 1026 | /// @} | ||||||||
| 1027 | /// \name Comparison Operators | ||||||||
| 1028 | /// @{ | ||||||||
| 1029 | |||||||||
| 1030 | /// Equality operator. | ||||||||
| 1031 | /// | ||||||||
| 1032 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
| 1033 | /// relationship. | ||||||||
| 1034 | bool operator==(const APInt &RHS) const { | ||||||||
| 1035 | assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth && "Comparison requires equal bit widths") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\"" , "llvm/include/llvm/ADT/APInt.h", 1035, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1036 | if (isSingleWord()) | ||||||||
| 1037 | return U.VAL == RHS.U.VAL; | ||||||||
| 1038 | return equalSlowCase(RHS); | ||||||||
| 1039 | } | ||||||||
| 1040 | |||||||||
| 1041 | /// Equality operator. | ||||||||
| 1042 | /// | ||||||||
| 1043 | /// Compares this APInt with a uint64_t for the validity of the equality | ||||||||
| 1044 | /// relationship. | ||||||||
| 1045 | /// | ||||||||
| 1046 | /// \returns true if *this == Val | ||||||||
| 1047 | bool operator==(uint64_t Val) const { | ||||||||
| 1048 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val; | ||||||||
| 1049 | } | ||||||||
| 1050 | |||||||||
| 1051 | /// Equality comparison. | ||||||||
| 1052 | /// | ||||||||
| 1053 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
| 1054 | /// relationship. | ||||||||
| 1055 | /// | ||||||||
| 1056 | /// \returns true if *this == Val | ||||||||
| 1057 | bool eq(const APInt &RHS) const { return (*this) == RHS; } | ||||||||
| 1058 | |||||||||
| 1059 | /// Inequality operator. | ||||||||
| 1060 | /// | ||||||||
| 1061 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
| 1062 | /// relationship. | ||||||||
| 1063 | /// | ||||||||
| 1064 | /// \returns true if *this != Val | ||||||||
| 1065 | bool operator!=(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
| 1066 | |||||||||
| 1067 | /// Inequality operator. | ||||||||
| 1068 | /// | ||||||||
| 1069 | /// Compares this APInt with a uint64_t for the validity of the inequality | ||||||||
| 1070 | /// relationship. | ||||||||
| 1071 | /// | ||||||||
| 1072 | /// \returns true if *this != Val | ||||||||
| 1073 | bool operator!=(uint64_t Val) const { return !((*this) == Val); } | ||||||||
| 1074 | |||||||||
| 1075 | /// Inequality comparison | ||||||||
| 1076 | /// | ||||||||
| 1077 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
| 1078 | /// relationship. | ||||||||
| 1079 | /// | ||||||||
| 1080 | /// \returns true if *this != Val | ||||||||
| 1081 | bool ne(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
| 1082 | |||||||||
| 1083 | /// Unsigned less than comparison | ||||||||
| 1084 | /// | ||||||||
| 1085 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
| 1086 | /// the validity of the less-than relationship. | ||||||||
| 1087 | /// | ||||||||
| 1088 | /// \returns true if *this < RHS when both are considered unsigned. | ||||||||
| 1089 | bool ult(const APInt &RHS) const { return compare(RHS) < 0; } | ||||||||
| 1090 | |||||||||
| 1091 | /// Unsigned less than comparison | ||||||||
| 1092 | /// | ||||||||
| 1093 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
| 1094 | /// the validity of the less-than relationship. | ||||||||
| 1095 | /// | ||||||||
| 1096 | /// \returns true if *this < RHS when considered unsigned. | ||||||||
| 1097 | bool ult(uint64_t RHS) const { | ||||||||
| 1098 | // Only need to check active bits if not a single word. | ||||||||
| 1099 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS; | ||||||||
| 1100 | } | ||||||||
| 1101 | |||||||||
| 1102 | /// Signed less than comparison | ||||||||
| 1103 | /// | ||||||||
| 1104 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
| 1105 | /// validity of the less-than relationship. | ||||||||
| 1106 | /// | ||||||||
| 1107 | /// \returns true if *this < RHS when both are considered signed. | ||||||||
| 1108 | bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } | ||||||||
| 1109 | |||||||||
| 1110 | /// Signed less than comparison | ||||||||
| 1111 | /// | ||||||||
| 1112 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
| 1113 | /// the validity of the less-than relationship. | ||||||||
| 1114 | /// | ||||||||
| 1115 | /// \returns true if *this < RHS when considered signed. | ||||||||
| 1116 | bool slt(int64_t RHS) const { | ||||||||
| 1117 | return (!isSingleWord() && getSignificantBits() > 64) | ||||||||
| 1118 | ? isNegative() | ||||||||
| 1119 | : getSExtValue() < RHS; | ||||||||
| 1120 | } | ||||||||
| 1121 | |||||||||
| 1122 | /// Unsigned less or equal comparison | ||||||||
| 1123 | /// | ||||||||
| 1124 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
| 1125 | /// validity of the less-or-equal relationship. | ||||||||
| 1126 | /// | ||||||||
| 1127 | /// \returns true if *this <= RHS when both are considered unsigned. | ||||||||
| 1128 | bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } | ||||||||
| 1129 | |||||||||
| 1130 | /// Unsigned less or equal comparison | ||||||||
| 1131 | /// | ||||||||
| 1132 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
| 1133 | /// the validity of the less-or-equal relationship. | ||||||||
| 1134 | /// | ||||||||
| 1135 | /// \returns true if *this <= RHS when considered unsigned. | ||||||||
| 1136 | bool ule(uint64_t RHS) const { return !ugt(RHS); } | ||||||||
| 1137 | |||||||||
| 1138 | /// Signed less or equal comparison | ||||||||
| 1139 | /// | ||||||||
| 1140 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
| 1141 | /// validity of the less-or-equal relationship. | ||||||||
| 1142 | /// | ||||||||
| 1143 | /// \returns true if *this <= RHS when both are considered signed. | ||||||||
| 1144 | bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } | ||||||||
| 1145 | |||||||||
| 1146 | /// Signed less or equal comparison | ||||||||
| 1147 | /// | ||||||||
| 1148 | /// Regards both *this as a signed quantity and compares it with RHS for the | ||||||||
| 1149 | /// validity of the less-or-equal relationship. | ||||||||
| 1150 | /// | ||||||||
| 1151 | /// \returns true if *this <= RHS when considered signed. | ||||||||
| 1152 | bool sle(uint64_t RHS) const { return !sgt(RHS); } | ||||||||
| 1153 | |||||||||
| 1154 | /// Unsigned greater than comparison | ||||||||
| 1155 | /// | ||||||||
| 1156 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
| 1157 | /// the validity of the greater-than relationship. | ||||||||
| 1158 | /// | ||||||||
| 1159 | /// \returns true if *this > RHS when both are considered unsigned. | ||||||||
| 1160 | bool ugt(const APInt &RHS) const { return !ule(RHS); } | ||||||||
| 1161 | |||||||||
| 1162 | /// Unsigned greater than comparison | ||||||||
| 1163 | /// | ||||||||
| 1164 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
| 1165 | /// the validity of the greater-than relationship. | ||||||||
| 1166 | /// | ||||||||
| 1167 | /// \returns true if *this > RHS when considered unsigned. | ||||||||
| 1168 | bool ugt(uint64_t RHS) const { | ||||||||
| 1169 | // Only need to check active bits if not a single word. | ||||||||
| 1170 | return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS; | ||||||||
| 1171 | } | ||||||||
| 1172 | |||||||||
| 1173 | /// Signed greater than comparison | ||||||||
| 1174 | /// | ||||||||
| 1175 | /// Regards both *this and RHS as signed quantities and compares them for the | ||||||||
| 1176 | /// validity of the greater-than relationship. | ||||||||
| 1177 | /// | ||||||||
| 1178 | /// \returns true if *this > RHS when both are considered signed. | ||||||||
| 1179 | bool sgt(const APInt &RHS) const { return !sle(RHS); } | ||||||||
| 1180 | |||||||||
| 1181 | /// Signed greater than comparison | ||||||||
| 1182 | /// | ||||||||
| 1183 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
| 1184 | /// the validity of the greater-than relationship. | ||||||||
| 1185 | /// | ||||||||
| 1186 | /// \returns true if *this > RHS when considered signed. | ||||||||
| 1187 | bool sgt(int64_t RHS) const { | ||||||||
| 1188 | return (!isSingleWord() && getSignificantBits() > 64) | ||||||||
| 1189 | ? !isNegative() | ||||||||
| 1190 | : getSExtValue() > RHS; | ||||||||
| 1191 | } | ||||||||
| 1192 | |||||||||
| 1193 | /// Unsigned greater or equal comparison | ||||||||
| 1194 | /// | ||||||||
| 1195 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
| 1196 | /// validity of the greater-or-equal relationship. | ||||||||
| 1197 | /// | ||||||||
| 1198 | /// \returns true if *this >= RHS when both are considered unsigned. | ||||||||
| 1199 | bool uge(const APInt &RHS) const { return !ult(RHS); } | ||||||||
| 1200 | |||||||||
| 1201 | /// Unsigned greater or equal comparison | ||||||||
| 1202 | /// | ||||||||
| 1203 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
| 1204 | /// the validity of the greater-or-equal relationship. | ||||||||
| 1205 | /// | ||||||||
| 1206 | /// \returns true if *this >= RHS when considered unsigned. | ||||||||
| 1207 | bool uge(uint64_t RHS) const { return !ult(RHS); } | ||||||||
| 1208 | |||||||||
| 1209 | /// Signed greater or equal comparison | ||||||||
| 1210 | /// | ||||||||
| 1211 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
| 1212 | /// validity of the greater-or-equal relationship. | ||||||||
| 1213 | /// | ||||||||
| 1214 | /// \returns true if *this >= RHS when both are considered signed. | ||||||||
| 1215 | bool sge(const APInt &RHS) const { return !slt(RHS); } | ||||||||
| 1216 | |||||||||
| 1217 | /// Signed greater or equal comparison | ||||||||
| 1218 | /// | ||||||||
| 1219 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
| 1220 | /// the validity of the greater-or-equal relationship. | ||||||||
| 1221 | /// | ||||||||
| 1222 | /// \returns true if *this >= RHS when considered signed. | ||||||||
| 1223 | bool sge(int64_t RHS) const { return !slt(RHS); } | ||||||||
| 1224 | |||||||||
| 1225 | /// This operation tests if there are any pairs of corresponding bits | ||||||||
| 1226 | /// between this APInt and RHS that are both set. | ||||||||
| 1227 | bool intersects(const APInt &RHS) const { | ||||||||
| 1228 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 1228, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1229 | if (isSingleWord()) | ||||||||
| 1230 | return (U.VAL & RHS.U.VAL) != 0; | ||||||||
| 1231 | return intersectsSlowCase(RHS); | ||||||||
| 1232 | } | ||||||||
| 1233 | |||||||||
| 1234 | /// This operation checks that all bits set in this APInt are also set in RHS. | ||||||||
| 1235 | bool isSubsetOf(const APInt &RHS) const { | ||||||||
| 1236 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 1236, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1237 | if (isSingleWord()) | ||||||||
| 1238 | return (U.VAL & ~RHS.U.VAL) == 0; | ||||||||
| 1239 | return isSubsetOfSlowCase(RHS); | ||||||||
| 1240 | } | ||||||||
| 1241 | |||||||||
| 1242 | /// @} | ||||||||
| 1243 | /// \name Resizing Operators | ||||||||
| 1244 | /// @{ | ||||||||
| 1245 | |||||||||
| 1246 | /// Truncate to new width. | ||||||||
| 1247 | /// | ||||||||
| 1248 | /// Truncate the APInt to a specified width. It is an error to specify a width | ||||||||
| 1249 | /// that is greater than the current width. | ||||||||
| 1250 | APInt trunc(unsigned width) const; | ||||||||
| 1251 | |||||||||
| 1252 | /// Truncate to new width with unsigned saturation. | ||||||||
| 1253 | /// | ||||||||
| 1254 | /// If the APInt, treated as unsigned integer, can be losslessly truncated to | ||||||||
| 1255 | /// the new bitwidth, then return truncated APInt. Else, return max value. | ||||||||
| 1256 | APInt truncUSat(unsigned width) const; | ||||||||
| 1257 | |||||||||
| 1258 | /// Truncate to new width with signed saturation. | ||||||||
| 1259 | /// | ||||||||
| 1260 | /// If this APInt, treated as signed integer, can be losslessly truncated to | ||||||||
| 1261 | /// the new bitwidth, then return truncated APInt. Else, return either | ||||||||
| 1262 | /// signed min value if the APInt was negative, or signed max value. | ||||||||
| 1263 | APInt truncSSat(unsigned width) const; | ||||||||
| 1264 | |||||||||
| 1265 | /// Sign extend to a new width. | ||||||||
| 1266 | /// | ||||||||
| 1267 | /// This operation sign extends the APInt to a new width. If the high order | ||||||||
| 1268 | /// bit is set, the fill on the left will be done with 1 bits, otherwise zero. | ||||||||
| 1269 | /// It is an error to specify a width that is less than the | ||||||||
| 1270 | /// current width. | ||||||||
| 1271 | APInt sext(unsigned width) const; | ||||||||
| 1272 | |||||||||
| 1273 | /// Zero extend to a new width. | ||||||||
| 1274 | /// | ||||||||
| 1275 | /// This operation zero extends the APInt to a new width. The high order bits | ||||||||
| 1276 | /// are filled with 0 bits. It is an error to specify a width that is less | ||||||||
| 1277 | /// than the current width. | ||||||||
| 1278 | APInt zext(unsigned width) const; | ||||||||
| 1279 | |||||||||
| 1280 | /// Sign extend or truncate to width | ||||||||
| 1281 | /// | ||||||||
| 1282 | /// Make this APInt have the bit width given by \p width. The value is sign | ||||||||
| 1283 | /// extended, truncated, or left alone to make it that width. | ||||||||
| 1284 | APInt sextOrTrunc(unsigned width) const; | ||||||||
| 1285 | |||||||||
| 1286 | /// Zero extend or truncate to width | ||||||||
| 1287 | /// | ||||||||
| 1288 | /// Make this APInt have the bit width given by \p width. The value is zero | ||||||||
| 1289 | /// extended, truncated, or left alone to make it that width. | ||||||||
| 1290 | APInt zextOrTrunc(unsigned width) const; | ||||||||
| 1291 | |||||||||
| 1292 | /// @} | ||||||||
| 1293 | /// \name Bit Manipulation Operators | ||||||||
| 1294 | /// @{ | ||||||||
| 1295 | |||||||||
| 1296 | /// Set every bit to 1. | ||||||||
| 1297 | void setAllBits() { | ||||||||
| 1298 | if (isSingleWord()) | ||||||||
| 1299 | U.VAL = WORDTYPE_MAX; | ||||||||
| 1300 | else | ||||||||
| 1301 | // Set all the bits in all the words. | ||||||||
| 1302 | memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); | ||||||||
| 1303 | // Clear the unused ones | ||||||||
| 1304 | clearUnusedBits(); | ||||||||
| 1305 | } | ||||||||
| 1306 | |||||||||
| 1307 | /// Set the given bit to 1 whose position is given as "bitPosition". | ||||||||
| 1308 | void setBit(unsigned BitPosition) { | ||||||||
| 1309 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1309, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1310 | WordType Mask = maskBit(BitPosition); | ||||||||
| 1311 | if (isSingleWord()) | ||||||||
| 1312 | U.VAL |= Mask; | ||||||||
| 1313 | else | ||||||||
| 1314 | U.pVal[whichWord(BitPosition)] |= Mask; | ||||||||
| 1315 | } | ||||||||
| 1316 | |||||||||
| 1317 | /// Set the sign bit to 1. | ||||||||
| 1318 | void setSignBit() { setBit(BitWidth - 1); } | ||||||||
| 1319 | |||||||||
| 1320 | /// Set a given bit to a given value. | ||||||||
| 1321 | void setBitVal(unsigned BitPosition, bool BitValue) { | ||||||||
| 1322 | if (BitValue) | ||||||||
| 1323 | setBit(BitPosition); | ||||||||
| 1324 | else | ||||||||
| 1325 | clearBit(BitPosition); | ||||||||
| 1326 | } | ||||||||
| 1327 | |||||||||
| 1328 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
| 1329 | /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls | ||||||||
| 1330 | /// setBits when \p loBit < \p hiBit. | ||||||||
| 1331 | /// For \p loBit == \p hiBit wrap case, set every bit to 1. | ||||||||
| 1332 | void setBitsWithWrap(unsigned loBit, unsigned hiBit) { | ||||||||
| 1333 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1333, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1334 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1334, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1335 | if (loBit < hiBit) { | ||||||||
| 1336 | setBits(loBit, hiBit); | ||||||||
| 1337 | return; | ||||||||
| 1338 | } | ||||||||
| 1339 | setLowBits(hiBit); | ||||||||
| 1340 | setHighBits(BitWidth - loBit); | ||||||||
| 1341 | } | ||||||||
| 1342 | |||||||||
| 1343 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
| 1344 | /// This function handles case when \p loBit <= \p hiBit. | ||||||||
| 1345 | void setBits(unsigned loBit, unsigned hiBit) { | ||||||||
| 1346 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1346, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1347 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1347, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1348 | assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit" ) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\"" , "llvm/include/llvm/ADT/APInt.h", 1348, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1349 | if (loBit == hiBit) | ||||||||
| 1350 | return; | ||||||||
| 1351 | if (loBit
| ||||||||
| 1352 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); | ||||||||
| 1353 | mask <<= loBit; | ||||||||
| 1354 | if (isSingleWord()) | ||||||||
| 1355 | U.VAL |= mask; | ||||||||
| 1356 | else | ||||||||
| 1357 | U.pVal[0] |= mask; | ||||||||
| 1358 | } else { | ||||||||
| 1359 | setBitsSlowCase(loBit, hiBit); | ||||||||
| 1360 | } | ||||||||
| 1361 | } | ||||||||
| 1362 | |||||||||
| 1363 | /// Set the top bits starting from loBit. | ||||||||
| 1364 | void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); } | ||||||||
| 1365 | |||||||||
| 1366 | /// Set the bottom loBits bits. | ||||||||
| 1367 | void setLowBits(unsigned loBits) { return setBits(0, loBits); } | ||||||||
| 1368 | |||||||||
| 1369 | /// Set the top hiBits bits. | ||||||||
| 1370 | void setHighBits(unsigned hiBits) { | ||||||||
| 1371 | return setBits(BitWidth - hiBits, BitWidth); | ||||||||
| 1372 | } | ||||||||
| 1373 | |||||||||
| 1374 | /// Set every bit to 0. | ||||||||
| 1375 | void clearAllBits() { | ||||||||
| 1376 | if (isSingleWord()) | ||||||||
| 1377 | U.VAL = 0; | ||||||||
| 1378 | else | ||||||||
| 1379 | memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); | ||||||||
| 1380 | } | ||||||||
| 1381 | |||||||||
| 1382 | /// Set a given bit to 0. | ||||||||
| 1383 | /// | ||||||||
| 1384 | /// Set the given bit to 0 whose position is given as "bitPosition". | ||||||||
| 1385 | void clearBit(unsigned BitPosition) { | ||||||||
| 1386 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1386, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1387 | WordType Mask = ~maskBit(BitPosition); | ||||||||
| 1388 | if (isSingleWord()) | ||||||||
| 1389 | U.VAL &= Mask; | ||||||||
| 1390 | else | ||||||||
| 1391 | U.pVal[whichWord(BitPosition)] &= Mask; | ||||||||
| 1392 | } | ||||||||
| 1393 | |||||||||
| 1394 | /// Set bottom loBits bits to 0. | ||||||||
| 1395 | void clearLowBits(unsigned loBits) { | ||||||||
| 1396 | assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth" ) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\"" , "llvm/include/llvm/ADT/APInt.h", 1396, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1397 | APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); | ||||||||
| 1398 | *this &= Keep; | ||||||||
| 1399 | } | ||||||||
| 1400 | |||||||||
| 1401 | /// Set the sign bit to 0. | ||||||||
| 1402 | void clearSignBit() { clearBit(BitWidth - 1); } | ||||||||
| 1403 | |||||||||
| 1404 | /// Toggle every bit to its opposite value. | ||||||||
| 1405 | void flipAllBits() { | ||||||||
| 1406 | if (isSingleWord()) { | ||||||||
| 1407 | U.VAL ^= WORDTYPE_MAX; | ||||||||
| 1408 | clearUnusedBits(); | ||||||||
| 1409 | } else { | ||||||||
| 1410 | flipAllBitsSlowCase(); | ||||||||
| 1411 | } | ||||||||
| 1412 | } | ||||||||
| 1413 | |||||||||
| 1414 | /// Toggles a given bit to its opposite value. | ||||||||
| 1415 | /// | ||||||||
| 1416 | /// Toggle a given bit to its opposite value whose position is given | ||||||||
| 1417 | /// as "bitPosition". | ||||||||
| 1418 | void flipBit(unsigned bitPosition); | ||||||||
| 1419 | |||||||||
| 1420 | /// Negate this APInt in place. | ||||||||
| 1421 | void negate() { | ||||||||
| 1422 | flipAllBits(); | ||||||||
| 1423 | ++(*this); | ||||||||
| 1424 | } | ||||||||
| 1425 | |||||||||
| 1426 | /// Insert the bits from a smaller APInt starting at bitPosition. | ||||||||
| 1427 | void insertBits(const APInt &SubBits, unsigned bitPosition); | ||||||||
| 1428 | void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); | ||||||||
| 1429 | |||||||||
| 1430 | /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). | ||||||||
| 1431 | APInt extractBits(unsigned numBits, unsigned bitPosition) const; | ||||||||
| 1432 | uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; | ||||||||
| 1433 | |||||||||
| 1434 | /// @} | ||||||||
| 1435 | /// \name Value Characterization Functions | ||||||||
| 1436 | /// @{ | ||||||||
| 1437 | |||||||||
| 1438 | /// Return the number of bits in the APInt. | ||||||||
| 1439 | unsigned getBitWidth() const { return BitWidth; } | ||||||||
| 1440 | |||||||||
| 1441 | /// Get the number of words. | ||||||||
| 1442 | /// | ||||||||
| 1443 | /// Here one word's bitwidth equals to that of uint64_t. | ||||||||
| 1444 | /// | ||||||||
| 1445 | /// \returns the number of words to hold the integer value of this APInt. | ||||||||
| 1446 | unsigned getNumWords() const { return getNumWords(BitWidth); } | ||||||||
| 1447 | |||||||||
| 1448 | /// Get the number of words. | ||||||||
| 1449 | /// | ||||||||
| 1450 | /// *NOTE* Here one word's bitwidth equals to that of uint64_t. | ||||||||
| 1451 | /// | ||||||||
| 1452 | /// \returns the number of words to hold the integer value with a given bit | ||||||||
| 1453 | /// width. | ||||||||
| 1454 | static unsigned getNumWords(unsigned BitWidth) { | ||||||||
| 1455 | return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; | ||||||||
| 1456 | } | ||||||||
| 1457 | |||||||||
| 1458 | /// Compute the number of active bits in the value | ||||||||
| 1459 | /// | ||||||||
| 1460 | /// This function returns the number of active bits which is defined as the | ||||||||
| 1461 | /// bit width minus the number of leading zeros. This is used in several | ||||||||
| 1462 | /// computations to see how "wide" the value is. | ||||||||
| 1463 | unsigned getActiveBits() const { return BitWidth - countl_zero(); } | ||||||||
| 1464 | |||||||||
| 1465 | /// Compute the number of active words in the value of this APInt. | ||||||||
| 1466 | /// | ||||||||
| 1467 | /// This is used in conjunction with getActiveData to extract the raw value of | ||||||||
| 1468 | /// the APInt. | ||||||||
| 1469 | unsigned getActiveWords() const { | ||||||||
| 1470 | unsigned numActiveBits = getActiveBits(); | ||||||||
| 1471 | return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1; | ||||||||
| 1472 | } | ||||||||
| 1473 | |||||||||
| 1474 | /// Get the minimum bit size for this signed APInt | ||||||||
| 1475 | /// | ||||||||
| 1476 | /// Computes the minimum bit width for this APInt while considering it to be a | ||||||||
| 1477 | /// signed (and probably negative) value. If the value is not negative, this | ||||||||
| 1478 | /// function returns the same value as getActiveBits()+1. Otherwise, it | ||||||||
| 1479 | /// returns the smallest bit width that will retain the negative value. For | ||||||||
| 1480 | /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so | ||||||||
| 1481 | /// for -1, this function will always return 1. | ||||||||
| 1482 | unsigned getSignificantBits() const { | ||||||||
| 1483 | return BitWidth - getNumSignBits() + 1; | ||||||||
| 1484 | } | ||||||||
| 1485 | |||||||||
| 1486 | /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead. | ||||||||
| 1487 | unsigned getMinSignedBits() const { return getSignificantBits(); } | ||||||||
| 1488 | |||||||||
| 1489 | /// Get zero extended value | ||||||||
| 1490 | /// | ||||||||
| 1491 | /// This method attempts to return the value of this APInt as a zero extended | ||||||||
| 1492 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a | ||||||||
| 1493 | /// uint64_t. Otherwise an assertion will result. | ||||||||
| 1494 | uint64_t getZExtValue() const { | ||||||||
| 1495 | if (isSingleWord()) | ||||||||
| 1496 | return U.VAL; | ||||||||
| 1497 | assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 && "Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\"" , "llvm/include/llvm/ADT/APInt.h", 1497, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1498 | return U.pVal[0]; | ||||||||
| 1499 | } | ||||||||
| 1500 | |||||||||
| 1501 | /// Get zero extended value if possible | ||||||||
| 1502 | /// | ||||||||
| 1503 | /// This method attempts to return the value of this APInt as a zero extended | ||||||||
| 1504 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a | ||||||||
| 1505 | /// uint64_t. Otherwise no value is returned. | ||||||||
| 1506 | std::optional<uint64_t> tryZExtValue() const { | ||||||||
| 1507 | return (getActiveBits() <= 64) ? std::optional<uint64_t>(getZExtValue()) | ||||||||
| 1508 | : std::nullopt; | ||||||||
| 1509 | }; | ||||||||
| 1510 | |||||||||
| 1511 | /// Get sign extended value | ||||||||
| 1512 | /// | ||||||||
| 1513 | /// This method attempts to return the value of this APInt as a sign extended | ||||||||
| 1514 | /// int64_t. The bit width must be <= 64 or the value must fit within an | ||||||||
| 1515 | /// int64_t. Otherwise an assertion will result. | ||||||||
| 1516 | int64_t getSExtValue() const { | ||||||||
| 1517 | if (isSingleWord()) | ||||||||
| 1518 | return SignExtend64(U.VAL, BitWidth); | ||||||||
| 1519 | assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 && "Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\"" , "llvm/include/llvm/ADT/APInt.h", 1519, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
| 1520 | return int64_t(U.pVal[0]); | ||||||||
| 1521 | } | ||||||||
| 1522 | |||||||||
| 1523 | /// Get sign extended value if possible | ||||||||
| 1524 | /// | ||||||||
| 1525 | /// This method attempts to return the value of this APInt as a sign extended | ||||||||
| 1526 | /// int64_t. The bitwidth must be <= 64 or the value must fit within an | ||||||||
| 1527 | /// int64_t. Otherwise no value is returned. | ||||||||
| 1528 | std::optional<int64_t> trySExtValue() const { | ||||||||
| 1529 | return (getSignificantBits() <= 64) ? std::optional<int64_t>(getSExtValue()) | ||||||||
| 1530 | : std::nullopt; | ||||||||
| 1531 | }; | ||||||||
| 1532 | |||||||||
| 1533 | /// Get bits required for string value. | ||||||||
| 1534 | /// | ||||||||
| 1535 | /// This method determines how many bits are required to hold the APInt | ||||||||
| 1536 | /// equivalent of the string given by \p str. | ||||||||
| 1537 | static unsigned getBitsNeeded(StringRef str, uint8_t radix); | ||||||||
| 1538 | |||||||||
| 1539 | /// Get the bits that are sufficient to represent the string value. This may | ||||||||
| 1540 | /// over estimate the amount of bits required, but it does not require | ||||||||
| 1541 | /// parsing the value in the string. | ||||||||
| 1542 | static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix); | ||||||||
| 1543 | |||||||||
| 1544 | /// The APInt version of std::countl_zero. | ||||||||
| 1545 | /// | ||||||||
| 1546 | /// It counts the number of zeros from the most significant bit to the first | ||||||||
| 1547 | /// one bit. | ||||||||
| 1548 | /// | ||||||||
| 1549 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
| 1550 | /// zeros from the most significant bit to the first one bits. | ||||||||
| 1551 | unsigned countl_zero() const { | ||||||||
| 1552 | if (isSingleWord()) { | ||||||||
| 1553 | unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; | ||||||||
| 1554 | return llvm::countl_zero(U.VAL) - unusedBits; | ||||||||
| 1555 | } | ||||||||
| 1556 | return countLeadingZerosSlowCase(); | ||||||||
| 1557 | } | ||||||||
| 1558 | |||||||||
| 1559 | unsigned countLeadingZeros() const { return countl_zero(); } | ||||||||
| 1560 | |||||||||
| 1561 | /// Count the number of leading one bits. | ||||||||
| 1562 | /// | ||||||||
| 1563 | /// This function is an APInt version of std::countl_one. It counts the number | ||||||||
| 1564 | /// of ones from the most significant bit to the first zero bit. | ||||||||
| 1565 | /// | ||||||||
| 1566 | /// \returns 0 if the high order bit is not set, otherwise returns the number | ||||||||
| 1567 | /// of 1 bits from the most significant to the least | ||||||||
| 1568 | unsigned countl_one() const { | ||||||||
| 1569 | if (isSingleWord()) { | ||||||||
| 1570 | if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false)) | ||||||||
| 1571 | return 0; | ||||||||
| 1572 | return llvm::countl_one(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); | ||||||||
| 1573 | } | ||||||||
| 1574 | return countLeadingOnesSlowCase(); | ||||||||
| 1575 | } | ||||||||
| 1576 | |||||||||
| 1577 | unsigned countLeadingOnes() const { return countl_one(); } | ||||||||
| 1578 | |||||||||
| 1579 | /// Computes the number of leading bits of this APInt that are equal to its | ||||||||
| 1580 | /// sign bit. | ||||||||
| 1581 | unsigned getNumSignBits() const { | ||||||||
| 1582 | return isNegative() ? countl_one() : countl_zero(); | ||||||||
| 1583 | } | ||||||||
| 1584 | |||||||||
| 1585 | /// Count the number of trailing zero bits. | ||||||||
| 1586 | /// | ||||||||
| 1587 | /// This function is an APInt version of the countr_zero. It counts the number | ||||||||
| 1588 | /// of zeros from the least significant bit to the first set bit. | ||||||||
| 1589 | /// | ||||||||
| 1590 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
| 1591 | /// zeros from the least significant bit to the first one bit. | ||||||||
| 1592 | unsigned countr_zero() const { | ||||||||
| 1593 | if (isSingleWord()) { | ||||||||
| 1594 | unsigned TrailingZeros = llvm::countr_zero(U.VAL); | ||||||||
| 1595 | return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros); | ||||||||
| 1596 | } | ||||||||
| 1597 | return countTrailingZerosSlowCase(); | ||||||||
| 1598 | } | ||||||||
| 1599 | |||||||||
| 1600 | unsigned countTrailingZeros() const { return countr_zero(); } | ||||||||
| 1601 | |||||||||
| 1602 | /// Count the number of trailing one bits. | ||||||||
| 1603 | /// | ||||||||
| 1604 | /// This function is an APInt version of std::countr_one. It counts the number | ||||||||
| 1605 | /// of ones from the least significant bit to the first zero bit. | ||||||||
| 1606 | /// | ||||||||
| 1607 | /// \returns BitWidth if the value is all ones, otherwise returns the number | ||||||||
| 1608 | /// of ones from the least significant bit to the first zero bit. | ||||||||
| 1609 | unsigned countr_one() const { | ||||||||
| 1610 | if (isSingleWord()) | ||||||||
| 1611 | return llvm::countr_one(U.VAL); | ||||||||
| 1612 | return countTrailingOnesSlowCase(); | ||||||||
| 1613 | } | ||||||||
| 1614 | |||||||||
| 1615 | unsigned countTrailingOnes() const { return countr_one(); } | ||||||||
| 1616 | |||||||||
| 1617 | /// Count the number of bits set. | ||||||||
| 1618 | /// | ||||||||
| 1619 | /// This function is an APInt version of std::popcount. It counts the number | ||||||||
| 1620 | /// of 1 bits in the APInt value. | ||||||||
| 1621 | /// | ||||||||
| 1622 | /// \returns 0 if the value is zero, otherwise returns the number of set bits. | ||||||||
| 1623 | unsigned popcount() const { | ||||||||
| 1624 | if (isSingleWord()) | ||||||||
| 1625 | return llvm::popcount(U.VAL); | ||||||||
| 1626 | return countPopulationSlowCase(); | ||||||||
| 1627 | } | ||||||||
| 1628 | |||||||||
| 1629 | unsigned countPopulation() const { return popcount(); } | ||||||||
| 1630 | |||||||||
| 1631 | /// @} | ||||||||
| 1632 | /// \name Conversion Functions | ||||||||
| 1633 | /// @{ | ||||||||
| 1634 | void print(raw_ostream &OS, bool isSigned) const; | ||||||||
| 1635 | |||||||||
| 1636 | /// Converts an APInt to a string and append it to Str. Str is commonly a | ||||||||
| 1637 | /// SmallString. | ||||||||
| 1638 | void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, | ||||||||
| 1639 | bool formatAsCLiteral = false) const; | ||||||||
| 1640 | |||||||||
| 1641 | /// Considers the APInt to be unsigned and converts it into a string in the | ||||||||
| 1642 | /// radix given. The radix can be 2, 8, 10 16, or 36. | ||||||||
| 1643 | void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
| 1644 | toString(Str, Radix, false, false); | ||||||||
| 1645 | } | ||||||||
| 1646 | |||||||||
| 1647 | /// Considers the APInt to be signed and converts it into a string in the | ||||||||
| 1648 | /// radix given. The radix can be 2, 8, 10, 16, or 36. | ||||||||
| 1649 | void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
| 1650 | toString(Str, Radix, true, false); | ||||||||
| 1651 | } | ||||||||
| 1652 | |||||||||
| 1653 | /// \returns a byte-swapped representation of this APInt Value. | ||||||||
| 1654 | APInt byteSwap() const; | ||||||||
| 1655 | |||||||||
| 1656 | /// \returns the value with the bit representation reversed of this APInt | ||||||||
| 1657 | /// Value. | ||||||||
| 1658 | APInt reverseBits() const; | ||||||||
| 1659 | |||||||||
| 1660 | /// Converts this APInt to a double value. | ||||||||
| 1661 | double roundToDouble(bool isSigned) const; | ||||||||
| 1662 | |||||||||
| 1663 | /// Converts this unsigned APInt to a double value. | ||||||||
| 1664 | double roundToDouble() const { return roundToDouble(false); } | ||||||||
| 1665 | |||||||||
| 1666 | /// Converts this signed APInt to a double value. | ||||||||
| 1667 | double signedRoundToDouble() const { return roundToDouble(true); } | ||||||||
| 1668 | |||||||||
| 1669 | /// Converts APInt bits to a double | ||||||||
| 1670 | /// | ||||||||
| 1671 | /// The conversion does not do a translation from integer to double, it just | ||||||||
| 1672 | /// re-interprets the bits as a double. Note that it is valid to do this on | ||||||||
| 1673 | /// any bit width. Exactly 64 bits will be translated. | ||||||||
| 1674 | double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); } | ||||||||
| 1675 | |||||||||
| 1676 | /// Converts APInt bits to a float | ||||||||
| 1677 | /// | ||||||||
| 1678 | /// The conversion does not do a translation from integer to float, it just | ||||||||
| 1679 | /// re-interprets the bits as a float. Note that it is valid to do this on | ||||||||
| 1680 | /// any bit width. Exactly 32 bits will be translated. | ||||||||
| 1681 | float bitsToFloat() const { | ||||||||
| 1682 | return llvm::bit_cast<float>(static_cast<uint32_t>(getWord(0))); | ||||||||
| 1683 | } | ||||||||
| 1684 | |||||||||
| 1685 | /// Converts a double to APInt bits. | ||||||||
| 1686 | /// | ||||||||
| 1687 | /// The conversion does not do a translation from double to integer, it just | ||||||||
| 1688 | /// re-interprets the bits of the double. | ||||||||
| 1689 | static APInt doubleToBits(double V) { | ||||||||
| 1690 | return APInt(sizeof(double) * CHAR_BIT8, llvm::bit_cast<uint64_t>(V)); | ||||||||
| 1691 | } | ||||||||
| 1692 | |||||||||
| 1693 | /// Converts a float to APInt bits. | ||||||||
| 1694 | /// | ||||||||
| 1695 | /// The conversion does not do a translation from float to integer, it just | ||||||||
| 1696 | /// re-interprets the bits of the float. | ||||||||
| 1697 | static APInt floatToBits(float V) { | ||||||||
| 1698 | return APInt(sizeof(float) * CHAR_BIT8, llvm::bit_cast<uint32_t>(V)); | ||||||||
| 1699 | } | ||||||||
| 1700 | |||||||||
| 1701 | /// @} | ||||||||
| 1702 | /// \name Mathematics Operations | ||||||||
| 1703 | /// @{ | ||||||||
| 1704 | |||||||||
| 1705 | /// \returns the floor log base 2 of this APInt. | ||||||||
| 1706 | unsigned logBase2() const { return getActiveBits() - 1; } | ||||||||
| 1707 | |||||||||
| 1708 | /// \returns the ceil log base 2 of this APInt. | ||||||||
| 1709 | unsigned ceilLogBase2() const { | ||||||||
| 1710 | APInt temp(*this); | ||||||||
| 1711 | --temp; | ||||||||
| 1712 | return temp.getActiveBits(); | ||||||||
| 1713 | } | ||||||||
| 1714 | |||||||||
| 1715 | /// \returns the nearest log base 2 of this APInt. Ties round up. | ||||||||
| 1716 | /// | ||||||||
| 1717 | /// NOTE: When we have a BitWidth of 1, we define: | ||||||||
| 1718 | /// | ||||||||
| 1719 | /// log2(0) = UINT32_MAX | ||||||||
| 1720 | /// log2(1) = 0 | ||||||||
| 1721 | /// | ||||||||
| 1722 | /// to get around any mathematical concerns resulting from | ||||||||
| 1723 | /// referencing 2 in a space where 2 does no exist. | ||||||||
| 1724 | unsigned nearestLogBase2() const; | ||||||||
| 1725 | |||||||||
| 1726 | /// \returns the log base 2 of this APInt if its an exact power of two, -1 | ||||||||
| 1727 | /// otherwise | ||||||||
| 1728 | int32_t exactLogBase2() const { | ||||||||
| 1729 | if (!isPowerOf2()) | ||||||||
| 1730 | return -1; | ||||||||
| 1731 | return logBase2(); | ||||||||
| 1732 | } | ||||||||
| 1733 | |||||||||
| 1734 | /// Compute the square root. | ||||||||
| 1735 | APInt sqrt() const; | ||||||||
| 1736 | |||||||||
| 1737 | /// Get the absolute value. If *this is < 0 then return -(*this), otherwise | ||||||||
| 1738 | /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit | ||||||||
| 1739 | /// wide APInt) is unchanged due to how negation works. | ||||||||
| 1740 | APInt abs() const { | ||||||||
| 1741 | if (isNegative()) | ||||||||
| 1742 | return -(*this); | ||||||||
| 1743 | return *this; | ||||||||
| 1744 | } | ||||||||
| 1745 | |||||||||
| 1746 | /// \returns the multiplicative inverse for a given modulo. | ||||||||
| 1747 | APInt multiplicativeInverse(const APInt &modulo) const; | ||||||||
| 1748 | |||||||||
| 1749 | /// @} | ||||||||
| 1750 | /// \name Building-block Operations for APInt and APFloat | ||||||||
| 1751 | /// @{ | ||||||||
| 1752 | |||||||||
| 1753 | // These building block operations operate on a representation of arbitrary | ||||||||
| 1754 | // precision, two's-complement, bignum integer values. They should be | ||||||||
| 1755 | // sufficient to implement APInt and APFloat bignum requirements. Inputs are | ||||||||
| 1756 | // generally a pointer to the base of an array of integer parts, representing | ||||||||
| 1757 | // an unsigned bignum, and a count of how many parts there are. | ||||||||
| 1758 | |||||||||
| 1759 | /// Sets the least significant part of a bignum to the input value, and zeroes | ||||||||
| 1760 | /// out higher parts. | ||||||||
| 1761 | static void tcSet(WordType *, WordType, unsigned); | ||||||||
| 1762 | |||||||||
| 1763 | /// Assign one bignum to another. | ||||||||
| 1764 | static void tcAssign(WordType *, const WordType *, unsigned); | ||||||||
| 1765 | |||||||||
| 1766 | /// Returns true if a bignum is zero, false otherwise. | ||||||||
| 1767 | static bool tcIsZero(const WordType *, unsigned); | ||||||||
| 1768 | |||||||||
| 1769 | /// Extract the given bit of a bignum; returns 0 or 1. Zero-based. | ||||||||
| 1770 | static int tcExtractBit(const WordType *, unsigned bit); | ||||||||
| 1771 | |||||||||
| 1772 | /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to | ||||||||
| 1773 | /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least | ||||||||
| 1774 | /// significant bit of DST. All high bits above srcBITS in DST are | ||||||||
| 1775 | /// zero-filled. | ||||||||
| 1776 | static void tcExtract(WordType *, unsigned dstCount, const WordType *, | ||||||||
| 1777 | unsigned srcBits, unsigned srcLSB); | ||||||||
| 1778 | |||||||||
| 1779 | /// Set the given bit of a bignum. Zero-based. | ||||||||
| 1780 | static void tcSetBit(WordType *, unsigned bit); | ||||||||
| 1781 | |||||||||
| 1782 | /// Clear the given bit of a bignum. Zero-based. | ||||||||
| 1783 | static void tcClearBit(WordType *, unsigned bit); | ||||||||
| 1784 | |||||||||
| 1785 | /// Returns the bit number of the least or most significant set bit of a | ||||||||
| 1786 | /// number. If the input number has no bits set -1U is returned. | ||||||||
| 1787 | static unsigned tcLSB(const WordType *, unsigned n); | ||||||||
| 1788 | static unsigned tcMSB(const WordType *parts, unsigned n); | ||||||||
| 1789 | |||||||||
| 1790 | /// Negate a bignum in-place. | ||||||||
| 1791 | static void tcNegate(WordType *, unsigned); | ||||||||
| 1792 | |||||||||
| 1793 | /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
| 1794 | static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned); | ||||||||
| 1795 | /// DST += RHS. Returns the carry flag. | ||||||||
| 1796 | static WordType tcAddPart(WordType *, WordType, unsigned); | ||||||||
| 1797 | |||||||||
| 1798 | /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
| 1799 | static WordType tcSubtract(WordType *, const WordType *, WordType carry, | ||||||||
| 1800 | unsigned); | ||||||||
| 1801 | /// DST -= RHS. Returns the carry flag. | ||||||||
| 1802 | static WordType tcSubtractPart(WordType *, WordType, unsigned); | ||||||||
| 1803 | |||||||||
| 1804 | /// DST += SRC * MULTIPLIER + PART if add is true | ||||||||
| 1805 | /// DST = SRC * MULTIPLIER + PART if add is false | ||||||||
| 1806 | /// | ||||||||
| 1807 | /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must | ||||||||
| 1808 | /// start at the same point, i.e. DST == SRC. | ||||||||
| 1809 | /// | ||||||||
| 1810 | /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned. | ||||||||
| 1811 | /// Otherwise DST is filled with the least significant DSTPARTS parts of the | ||||||||
| 1812 | /// result, and if all of the omitted higher parts were zero return zero, | ||||||||
| 1813 | /// otherwise overflow occurred and return one. | ||||||||
| 1814 | static int tcMultiplyPart(WordType *dst, const WordType *src, | ||||||||
| 1815 | WordType multiplier, WordType carry, | ||||||||
| 1816 | unsigned srcParts, unsigned dstParts, bool add); | ||||||||
| 1817 | |||||||||
| 1818 | /// DST = LHS * RHS, where DST has the same width as the operands and is | ||||||||
| 1819 | /// filled with the least significant parts of the result. Returns one if | ||||||||
| 1820 | /// overflow occurred, otherwise zero. DST must be disjoint from both | ||||||||
| 1821 | /// operands. | ||||||||
| 1822 | static int tcMultiply(WordType *, const WordType *, const WordType *, | ||||||||
| 1823 | unsigned); | ||||||||
| 1824 | |||||||||
| 1825 | /// DST = LHS * RHS, where DST has width the sum of the widths of the | ||||||||
| 1826 | /// operands. No overflow occurs. DST must be disjoint from both operands. | ||||||||
| 1827 | static void tcFullMultiply(WordType *, const WordType *, const WordType *, | ||||||||
| 1828 | unsigned, unsigned); | ||||||||
| 1829 | |||||||||
| 1830 | /// If RHS is zero LHS and REMAINDER are left unchanged, return one. | ||||||||
| 1831 | /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set | ||||||||
| 1832 | /// REMAINDER to the remainder, return zero. i.e. | ||||||||
| 1833 | /// | ||||||||
| 1834 | /// OLD_LHS = RHS * LHS + REMAINDER | ||||||||
| 1835 | /// | ||||||||
| 1836 | /// SCRATCH is a bignum of the same size as the operands and result for use by | ||||||||
| 1837 | /// the routine; its contents need not be initialized and are destroyed. LHS, | ||||||||
| 1838 | /// REMAINDER and SCRATCH must be distinct. | ||||||||
| 1839 | static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder, | ||||||||
| 1840 | WordType *scratch, unsigned parts); | ||||||||
| 1841 | |||||||||
| 1842 | /// Shift a bignum left Count bits. Shifted in bits are zero. There are no | ||||||||
| 1843 | /// restrictions on Count. | ||||||||
| 1844 | static void tcShiftLeft(WordType *, unsigned Words, unsigned Count); | ||||||||
| 1845 | |||||||||
| 1846 | /// Shift a bignum right Count bits. Shifted in bits are zero. There are no | ||||||||
| 1847 | /// restrictions on Count. | ||||||||
| 1848 | static void tcShiftRight(WordType *, unsigned Words, unsigned Count); | ||||||||
| 1849 | |||||||||
| 1850 | /// Comparison (unsigned) of two bignums. | ||||||||
| 1851 | static int tcCompare(const WordType *, const WordType *, unsigned); | ||||||||
| 1852 | |||||||||
| 1853 | /// Increment a bignum in-place. Return the carry flag. | ||||||||
| 1854 | static WordType tcIncrement(WordType *dst, unsigned parts) { | ||||||||
| 1855 | return tcAddPart(dst, 1, parts); | ||||||||
| 1856 | } | ||||||||
| 1857 | |||||||||
| 1858 | /// Decrement a bignum in-place. Return the borrow flag. | ||||||||
| 1859 | static WordType tcDecrement(WordType *dst, unsigned parts) { | ||||||||
| 1860 | return tcSubtractPart(dst, 1, parts); | ||||||||
| 1861 | } | ||||||||
| 1862 | |||||||||
| 1863 | /// Used to insert APInt objects, or objects that contain APInt objects, into | ||||||||
| 1864 | /// FoldingSets. | ||||||||
| 1865 | void Profile(FoldingSetNodeID &id) const; | ||||||||
| 1866 | |||||||||
| 1867 | /// debug method | ||||||||
| 1868 | void dump() const; | ||||||||
| 1869 | |||||||||
| 1870 | /// Returns whether this instance allocated memory. | ||||||||
| 1871 | bool needsCleanup() const { return !isSingleWord(); } | ||||||||
| 1872 | |||||||||
| 1873 | private: | ||||||||
| 1874 | /// This union is used to store the integer value. When the | ||||||||
| 1875 | /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. | ||||||||
| 1876 | union { | ||||||||
| 1877 | uint64_t VAL; ///< Used to store the <= 64 bits integer value. | ||||||||
| 1878 | uint64_t *pVal; ///< Used to store the >64 bits integer value. | ||||||||
| 1879 | } U; | ||||||||
| 1880 | |||||||||
| 1881 | unsigned BitWidth = 1; ///< The number of bits in this APInt. | ||||||||
| 1882 | |||||||||
| 1883 | friend struct DenseMapInfo<APInt, void>; | ||||||||
| 1884 | friend class APSInt; | ||||||||
| 1885 | |||||||||
| 1886 | /// This constructor is used only internally for speed of construction of | ||||||||
| 1887 | /// temporaries. It is unsafe since it takes ownership of the pointer, so it | ||||||||
| 1888 | /// is not public. | ||||||||
| 1889 | APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; } | ||||||||
| 1890 | |||||||||
| 1891 | /// Determine which word a bit is in. | ||||||||
| 1892 | /// | ||||||||
| 1893 | /// \returns the word position for the specified bit position. | ||||||||
| 1894 | static unsigned whichWord(unsigned bitPosition) { | ||||||||
| 1895 | return bitPosition / APINT_BITS_PER_WORD; | ||||||||
| 1896 | } | ||||||||
| 1897 | |||||||||
| 1898 | /// Determine which bit in a word the specified bit position is in. | ||||||||
| 1899 | static unsigned whichBit(unsigned bitPosition) { | ||||||||
| 1900 | return bitPosition % APINT_BITS_PER_WORD; | ||||||||
| 1901 | } | ||||||||
| 1902 | |||||||||
| 1903 | /// Get a single bit mask. | ||||||||
| 1904 | /// | ||||||||
| 1905 | /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set | ||||||||
| 1906 | /// This method generates and returns a uint64_t (word) mask for a single | ||||||||
| 1907 | /// bit at a specific bit position. This is used to mask the bit in the | ||||||||
| 1908 | /// corresponding word. | ||||||||
| 1909 | static uint64_t maskBit(unsigned bitPosition) { | ||||||||
| 1910 | return 1ULL << whichBit(bitPosition); | ||||||||
| 1911 | } | ||||||||
| 1912 | |||||||||
| 1913 | /// Clear unused high order bits | ||||||||
| 1914 | /// | ||||||||
| 1915 | /// This method is used internally to clear the top "N" bits in the high order | ||||||||
| 1916 | /// word that are not used by the APInt. This is needed after the most | ||||||||
| 1917 | /// significant word is assigned a value to ensure that those bits are | ||||||||
| 1918 | /// zero'd out. | ||||||||
| 1919 | APInt &clearUnusedBits() { | ||||||||
| 1920 | // Compute how many bits are used in the final word. | ||||||||
| 1921 | unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1; | ||||||||
| 1922 | |||||||||
| 1923 | // Mask out the high bits. | ||||||||
| 1924 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); | ||||||||
| 1925 | if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false)) | ||||||||
| 1926 | mask = 0; | ||||||||
| 1927 | |||||||||
| 1928 | if (isSingleWord()) | ||||||||
| 1929 | U.VAL &= mask; | ||||||||
| 1930 | else | ||||||||
| 1931 | U.pVal[getNumWords() - 1] &= mask; | ||||||||
| 1932 | return *this; | ||||||||
| 1933 | } | ||||||||
| 1934 | |||||||||
| 1935 | /// Get the word corresponding to a bit position | ||||||||
| 1936 | /// \returns the corresponding word for the specified bit position. | ||||||||
| 1937 | uint64_t getWord(unsigned bitPosition) const { | ||||||||
| 1938 | return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; | ||||||||
| 1939 | } | ||||||||
| 1940 | |||||||||
| 1941 | /// Utility method to change the bit width of this APInt to new bit width, | ||||||||
| 1942 | /// allocating and/or deallocating as necessary. There is no guarantee on the | ||||||||
| 1943 | /// value of any bits upon return. Caller should populate the bits after. | ||||||||
| 1944 | void reallocate(unsigned NewBitWidth); | ||||||||
| 1945 | |||||||||
| 1946 | /// Convert a char array into an APInt | ||||||||
| 1947 | /// | ||||||||
| 1948 | /// \param radix 2, 8, 10, 16, or 36 | ||||||||
| 1949 | /// Converts a string into a number. The string must be non-empty | ||||||||
| 1950 | /// and well-formed as a number of the given base. The bit-width | ||||||||
| 1951 | /// must be sufficient to hold the result. | ||||||||
| 1952 | /// | ||||||||
| 1953 | /// This is used by the constructors that take string arguments. | ||||||||
| 1954 | /// | ||||||||
| 1955 | /// StringRef::getAsInteger is superficially similar but (1) does | ||||||||
| 1956 | /// not assume that the string is well-formed and (2) grows the | ||||||||
| 1957 | /// result to hold the input. | ||||||||
| 1958 | void fromString(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
| 1959 | |||||||||
| 1960 | /// An internal division function for dividing APInts. | ||||||||
| 1961 | /// | ||||||||
| 1962 | /// This is used by the toString method to divide by the radix. It simply | ||||||||
| 1963 | /// provides a more convenient form of divide for internal use since KnuthDiv | ||||||||
| 1964 | /// has specific constraints on its inputs. If those constraints are not met | ||||||||
| 1965 | /// then it provides a simpler form of divide. | ||||||||
| 1966 | static void divide(const WordType *LHS, unsigned lhsWords, | ||||||||
| 1967 | const WordType *RHS, unsigned rhsWords, WordType *Quotient, | ||||||||
| 1968 | WordType *Remainder); | ||||||||
| 1969 | |||||||||
| 1970 | /// out-of-line slow case for inline constructor | ||||||||
| 1971 | void initSlowCase(uint64_t val, bool isSigned); | ||||||||
| 1972 | |||||||||
| 1973 | /// shared code between two array constructors | ||||||||
| 1974 | void initFromArray(ArrayRef<uint64_t> array); | ||||||||
| 1975 | |||||||||
| 1976 | /// out-of-line slow case for inline copy constructor | ||||||||
| 1977 | void initSlowCase(const APInt &that); | ||||||||
| 1978 | |||||||||
| 1979 | /// out-of-line slow case for shl | ||||||||
| 1980 | void shlSlowCase(unsigned ShiftAmt); | ||||||||
| 1981 | |||||||||
| 1982 | /// out-of-line slow case for lshr. | ||||||||
| 1983 | void lshrSlowCase(unsigned ShiftAmt); | ||||||||
| 1984 | |||||||||
| 1985 | /// out-of-line slow case for ashr. | ||||||||
| 1986 | void ashrSlowCase(unsigned ShiftAmt); | ||||||||
| 1987 | |||||||||
| 1988 | /// out-of-line slow case for operator= | ||||||||
| 1989 | void assignSlowCase(const APInt &RHS); | ||||||||
| 1990 | |||||||||
| 1991 | /// out-of-line slow case for operator== | ||||||||
| 1992 | bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 1993 | |||||||||
| 1994 | /// out-of-line slow case for countLeadingZeros | ||||||||
| 1995 | unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 1996 | |||||||||
| 1997 | /// out-of-line slow case for countLeadingOnes. | ||||||||
| 1998 | unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 1999 | |||||||||
| 2000 | /// out-of-line slow case for countTrailingZeros. | ||||||||
| 2001 | unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2002 | |||||||||
| 2003 | /// out-of-line slow case for countTrailingOnes | ||||||||
| 2004 | unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2005 | |||||||||
| 2006 | /// out-of-line slow case for countPopulation | ||||||||
| 2007 | unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2008 | |||||||||
| 2009 | /// out-of-line slow case for intersects. | ||||||||
| 2010 | bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2011 | |||||||||
| 2012 | /// out-of-line slow case for isSubsetOf. | ||||||||
| 2013 | bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2014 | |||||||||
| 2015 | /// out-of-line slow case for setBits. | ||||||||
| 2016 | void setBitsSlowCase(unsigned loBit, unsigned hiBit); | ||||||||
| 2017 | |||||||||
| 2018 | /// out-of-line slow case for flipAllBits. | ||||||||
| 2019 | void flipAllBitsSlowCase(); | ||||||||
| 2020 | |||||||||
| 2021 | /// out-of-line slow case for concat. | ||||||||
| 2022 | APInt concatSlowCase(const APInt &NewLSB) const; | ||||||||
| 2023 | |||||||||
| 2024 | /// out-of-line slow case for operator&=. | ||||||||
| 2025 | void andAssignSlowCase(const APInt &RHS); | ||||||||
| 2026 | |||||||||
| 2027 | /// out-of-line slow case for operator|=. | ||||||||
| 2028 | void orAssignSlowCase(const APInt &RHS); | ||||||||
| 2029 | |||||||||
| 2030 | /// out-of-line slow case for operator^=. | ||||||||
| 2031 | void xorAssignSlowCase(const APInt &RHS); | ||||||||
| 2032 | |||||||||
| 2033 | /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
| 2034 | /// to, or greater than RHS. | ||||||||
| 2035 | int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2036 | |||||||||
| 2037 | /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
| 2038 | /// to, or greater than RHS. | ||||||||
| 2039 | int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
| 2040 | |||||||||
| 2041 | /// @} | ||||||||
| 2042 | }; | ||||||||
| 2043 | |||||||||
| 2044 | inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } | ||||||||
| 2045 | |||||||||
| 2046 | inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; } | ||||||||
| 2047 | |||||||||
| 2048 | /// Unary bitwise complement operator. | ||||||||
| 2049 | /// | ||||||||
| 2050 | /// \returns an APInt that is the bitwise complement of \p v. | ||||||||
| 2051 | inline APInt operator~(APInt v) { | ||||||||
| 2052 | v.flipAllBits(); | ||||||||
| 2053 | return v; | ||||||||
| 2054 | } | ||||||||
| 2055 | |||||||||
| 2056 | inline APInt operator&(APInt a, const APInt &b) { | ||||||||
| 2057 | a &= b; | ||||||||
| 2058 | return a; | ||||||||
| 2059 | } | ||||||||
| 2060 | |||||||||
| 2061 | inline APInt operator&(const APInt &a, APInt &&b) { | ||||||||
| 2062 | b &= a; | ||||||||
| 2063 | return std::move(b); | ||||||||
| 2064 | } | ||||||||
| 2065 | |||||||||
| 2066 | inline APInt operator&(APInt a, uint64_t RHS) { | ||||||||
| 2067 | a &= RHS; | ||||||||
| 2068 | return a; | ||||||||
| 2069 | } | ||||||||
| 2070 | |||||||||
| 2071 | inline APInt operator&(uint64_t LHS, APInt b) { | ||||||||
| 2072 | b &= LHS; | ||||||||
| 2073 | return b; | ||||||||
| 2074 | } | ||||||||
| 2075 | |||||||||
| 2076 | inline APInt operator|(APInt a, const APInt &b) { | ||||||||
| 2077 | a |= b; | ||||||||
| 2078 | return a; | ||||||||
| 2079 | } | ||||||||
| 2080 | |||||||||
| 2081 | inline APInt operator|(const APInt &a, APInt &&b) { | ||||||||
| 2082 | b |= a; | ||||||||
| 2083 | return std::move(b); | ||||||||
| 2084 | } | ||||||||
| 2085 | |||||||||
| 2086 | inline APInt operator|(APInt a, uint64_t RHS) { | ||||||||
| 2087 | a |= RHS; | ||||||||
| 2088 | return a; | ||||||||
| 2089 | } | ||||||||
| 2090 | |||||||||
| 2091 | inline APInt operator|(uint64_t LHS, APInt b) { | ||||||||
| 2092 | b |= LHS; | ||||||||
| 2093 | return b; | ||||||||
| 2094 | } | ||||||||
| 2095 | |||||||||
| 2096 | inline APInt operator^(APInt a, const APInt &b) { | ||||||||
| 2097 | a ^= b; | ||||||||
| 2098 | return a; | ||||||||
| 2099 | } | ||||||||
| 2100 | |||||||||
| 2101 | inline APInt operator^(const APInt &a, APInt &&b) { | ||||||||
| 2102 | b ^= a; | ||||||||
| 2103 | return std::move(b); | ||||||||
| 2104 | } | ||||||||
| 2105 | |||||||||
| 2106 | inline APInt operator^(APInt a, uint64_t RHS) { | ||||||||
| 2107 | a ^= RHS; | ||||||||
| 2108 | return a; | ||||||||
| 2109 | } | ||||||||
| 2110 | |||||||||
| 2111 | inline APInt operator^(uint64_t LHS, APInt b) { | ||||||||
| 2112 | b ^= LHS; | ||||||||
| 2113 | return b; | ||||||||
| 2114 | } | ||||||||
| 2115 | |||||||||
| 2116 | inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { | ||||||||
| 2117 | I.print(OS, true); | ||||||||
| 2118 | return OS; | ||||||||
| 2119 | } | ||||||||
| 2120 | |||||||||
| 2121 | inline APInt operator-(APInt v) { | ||||||||
| 2122 | v.negate(); | ||||||||
| 2123 | return v; | ||||||||
| 2124 | } | ||||||||
| 2125 | |||||||||
| 2126 | inline APInt operator+(APInt a, const APInt &b) { | ||||||||
| 2127 | a += b; | ||||||||
| 2128 | return a; | ||||||||
| 2129 | } | ||||||||
| 2130 | |||||||||
| 2131 | inline APInt operator+(const APInt &a, APInt &&b) { | ||||||||
| 2132 | b += a; | ||||||||
| 2133 | return std::move(b); | ||||||||
| 2134 | } | ||||||||
| 2135 | |||||||||
| 2136 | inline APInt operator+(APInt a, uint64_t RHS) { | ||||||||
| 2137 | a += RHS; | ||||||||
| 2138 | return a; | ||||||||
| 2139 | } | ||||||||
| 2140 | |||||||||
| 2141 | inline APInt operator+(uint64_t LHS, APInt b) { | ||||||||
| 2142 | b += LHS; | ||||||||
| 2143 | return b; | ||||||||
| 2144 | } | ||||||||
| 2145 | |||||||||
| 2146 | inline APInt operator-(APInt a, const APInt &b) { | ||||||||
| 2147 | a -= b; | ||||||||
| 2148 | return a; | ||||||||
| 2149 | } | ||||||||
| 2150 | |||||||||
| 2151 | inline APInt operator-(const APInt &a, APInt &&b) { | ||||||||
| 2152 | b.negate(); | ||||||||
| 2153 | b += a; | ||||||||
| 2154 | return std::move(b); | ||||||||
| 2155 | } | ||||||||
| 2156 | |||||||||
| 2157 | inline APInt operator-(APInt a, uint64_t RHS) { | ||||||||
| 2158 | a -= RHS; | ||||||||
| 2159 | return a; | ||||||||
| 2160 | } | ||||||||
| 2161 | |||||||||
| 2162 | inline APInt operator-(uint64_t LHS, APInt b) { | ||||||||
| 2163 | b.negate(); | ||||||||
| 2164 | b += LHS; | ||||||||
| 2165 | return b; | ||||||||
| 2166 | } | ||||||||
| 2167 | |||||||||
| 2168 | inline APInt operator*(APInt a, uint64_t RHS) { | ||||||||
| 2169 | a *= RHS; | ||||||||
| 2170 | return a; | ||||||||
| 2171 | } | ||||||||
| 2172 | |||||||||
| 2173 | inline APInt operator*(uint64_t LHS, APInt b) { | ||||||||
| 2174 | b *= LHS; | ||||||||
| 2175 | return b; | ||||||||
| 2176 | } | ||||||||
| 2177 | |||||||||
| 2178 | namespace APIntOps { | ||||||||
| 2179 | |||||||||
| 2180 | /// Determine the smaller of two APInts considered to be signed. | ||||||||
| 2181 | inline const APInt &smin(const APInt &A, const APInt &B) { | ||||||||
| 2182 | return A.slt(B) ? A : B; | ||||||||
| 2183 | } | ||||||||
| 2184 | |||||||||
| 2185 | /// Determine the larger of two APInts considered to be signed. | ||||||||
| 2186 | inline const APInt &smax(const APInt &A, const APInt &B) { | ||||||||
| 2187 | return A.sgt(B) ? A : B; | ||||||||
| 2188 | } | ||||||||
| 2189 | |||||||||
| 2190 | /// Determine the smaller of two APInts considered to be unsigned. | ||||||||
| 2191 | inline const APInt &umin(const APInt &A, const APInt &B) { | ||||||||
| 2192 | return A.ult(B) ? A : B; | ||||||||
| 2193 | } | ||||||||
| 2194 | |||||||||
| 2195 | /// Determine the larger of two APInts considered to be unsigned. | ||||||||
| 2196 | inline const APInt &umax(const APInt &A, const APInt &B) { | ||||||||
| 2197 | return A.ugt(B) ? A : B; | ||||||||
| 2198 | } | ||||||||
| 2199 | |||||||||
| 2200 | /// Compute GCD of two unsigned APInt values. | ||||||||
| 2201 | /// | ||||||||
| 2202 | /// This function returns the greatest common divisor of the two APInt values | ||||||||
| 2203 | /// using Stein's algorithm. | ||||||||
| 2204 | /// | ||||||||
| 2205 | /// \returns the greatest common divisor of A and B. | ||||||||
| 2206 | APInt GreatestCommonDivisor(APInt A, APInt B); | ||||||||
| 2207 | |||||||||
| 2208 | /// Converts the given APInt to a double value. | ||||||||
| 2209 | /// | ||||||||
| 2210 | /// Treats the APInt as an unsigned value for conversion purposes. | ||||||||
| 2211 | inline double RoundAPIntToDouble(const APInt &APIVal) { | ||||||||
| 2212 | return APIVal.roundToDouble(); | ||||||||
| 2213 | } | ||||||||
| 2214 | |||||||||
| 2215 | /// Converts the given APInt to a double value. | ||||||||
| 2216 | /// | ||||||||
| 2217 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
| 2218 | inline double RoundSignedAPIntToDouble(const APInt &APIVal) { | ||||||||
| 2219 | return APIVal.signedRoundToDouble(); | ||||||||
| 2220 | } | ||||||||
| 2221 | |||||||||
| 2222 | /// Converts the given APInt to a float value. | ||||||||
| 2223 | inline float RoundAPIntToFloat(const APInt &APIVal) { | ||||||||
| 2224 | return float(RoundAPIntToDouble(APIVal)); | ||||||||
| 2225 | } | ||||||||
| 2226 | |||||||||
| 2227 | /// Converts the given APInt to a float value. | ||||||||
| 2228 | /// | ||||||||
| 2229 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
| 2230 | inline float RoundSignedAPIntToFloat(const APInt &APIVal) { | ||||||||
| 2231 | return float(APIVal.signedRoundToDouble()); | ||||||||
| 2232 | } | ||||||||
| 2233 | |||||||||
| 2234 | /// Converts the given double value into a APInt. | ||||||||
| 2235 | /// | ||||||||
| 2236 | /// This function convert a double value to an APInt value. | ||||||||
| 2237 | APInt RoundDoubleToAPInt(double Double, unsigned width); | ||||||||
| 2238 | |||||||||
| 2239 | /// Converts a float value into a APInt. | ||||||||
| 2240 | /// | ||||||||
| 2241 | /// Converts a float value into an APInt value. | ||||||||
| 2242 | inline APInt RoundFloatToAPInt(float Float, unsigned width) { | ||||||||
| 2243 | return RoundDoubleToAPInt(double(Float), width); | ||||||||
| 2244 | } | ||||||||
| 2245 | |||||||||
| 2246 | /// Return A unsign-divided by B, rounded by the given rounding mode. | ||||||||
| 2247 | APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
| 2248 | |||||||||
| 2249 | /// Return A sign-divided by B, rounded by the given rounding mode. | ||||||||
| 2250 | APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
| 2251 | |||||||||
| 2252 | /// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range | ||||||||
| 2253 | /// (e.g. 32 for i32). | ||||||||
| 2254 | /// This function finds the smallest number n, such that | ||||||||
| 2255 | /// (a) n >= 0 and q(n) = 0, or | ||||||||
| 2256 | /// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all | ||||||||
| 2257 | /// integers, belong to two different intervals [Rk, Rk+R), | ||||||||
| 2258 | /// where R = 2^BW, and k is an integer. | ||||||||
| 2259 | /// The idea here is to find when q(n) "overflows" 2^BW, while at the | ||||||||
| 2260 | /// same time "allowing" subtraction. In unsigned modulo arithmetic a | ||||||||
| 2261 | /// subtraction (treated as addition of negated numbers) would always | ||||||||
| 2262 | /// count as an overflow, but here we want to allow values to decrease | ||||||||
| 2263 | /// and increase as long as they are within the same interval. | ||||||||
| 2264 | /// Specifically, adding of two negative numbers should not cause an | ||||||||
| 2265 | /// overflow (as long as the magnitude does not exceed the bit width). | ||||||||
| 2266 | /// On the other hand, given a positive number, adding a negative | ||||||||
| 2267 | /// number to it can give a negative result, which would cause the | ||||||||
| 2268 | /// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is | ||||||||
| 2269 | /// treated as a special case of an overflow. | ||||||||
| 2270 | /// | ||||||||
| 2271 | /// This function returns std::nullopt if after finding k that minimizes the | ||||||||
| 2272 | /// positive solution to q(n) = kR, both solutions are contained between | ||||||||
| 2273 | /// two consecutive integers. | ||||||||
| 2274 | /// | ||||||||
| 2275 | /// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation | ||||||||
| 2276 | /// in arithmetic modulo 2^BW, and treating the values as signed) by the | ||||||||
| 2277 | /// virtue of *signed* overflow. This function will *not* find such an n, | ||||||||
| 2278 | /// however it may find a value of n satisfying the inequalities due to | ||||||||
| 2279 | /// an *unsigned* overflow (if the values are treated as unsigned). | ||||||||
| 2280 | /// To find a solution for a signed overflow, treat it as a problem of | ||||||||
| 2281 | /// finding an unsigned overflow with a range with of BW-1. | ||||||||
| 2282 | /// | ||||||||
| 2283 | /// The returned value may have a different bit width from the input | ||||||||
| 2284 | /// coefficients. | ||||||||
| 2285 | std::optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, | ||||||||
| 2286 | unsigned RangeWidth); | ||||||||
| 2287 | |||||||||
| 2288 | /// Compare two values, and if they are different, return the position of the | ||||||||
| 2289 | /// most significant bit that is different in the values. | ||||||||
| 2290 | std::optional<unsigned> GetMostSignificantDifferentBit(const APInt &A, | ||||||||
| 2291 | const APInt &B); | ||||||||
| 2292 | |||||||||
| 2293 | /// Splat/Merge neighboring bits to widen/narrow the bitmask represented | ||||||||
| 2294 | /// by \param A to \param NewBitWidth bits. | ||||||||
| 2295 | /// | ||||||||
| 2296 | /// MatchAnyBits: (Default) | ||||||||
| 2297 | /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 | ||||||||
| 2298 | /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 | ||||||||
| 2299 | /// | ||||||||
| 2300 | /// MatchAllBits: | ||||||||
| 2301 | /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 | ||||||||
| 2302 | /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001 | ||||||||
| 2303 | /// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. | ||||||||
| 2304 | APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, | ||||||||
| 2305 | bool MatchAllBits = false); | ||||||||
| 2306 | } // namespace APIntOps | ||||||||
| 2307 | |||||||||
| 2308 | // See friend declaration above. This additional declaration is required in | ||||||||
| 2309 | // order to compile LLVM with IBM xlC compiler. | ||||||||
| 2310 | hash_code hash_value(const APInt &Arg); | ||||||||
| 2311 | |||||||||
| 2312 | /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst | ||||||||
| 2313 | /// with the integer held in IntVal. | ||||||||
| 2314 | void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); | ||||||||
| 2315 | |||||||||
| 2316 | /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting | ||||||||
| 2317 | /// from Src into IntVal, which is assumed to be wide enough and to hold zero. | ||||||||
| 2318 | void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); | ||||||||
| 2319 | |||||||||
| 2320 | /// Provide DenseMapInfo for APInt. | ||||||||
| 2321 | template <> struct DenseMapInfo<APInt, void> { | ||||||||
| 2322 | static inline APInt getEmptyKey() { | ||||||||
| 2323 | APInt V(nullptr, 0); | ||||||||
| 2324 | V.U.VAL = ~0ULL; | ||||||||
| 2325 | return V; | ||||||||
| 2326 | } | ||||||||
| 2327 | |||||||||
| 2328 | static inline APInt getTombstoneKey() { | ||||||||
| 2329 | APInt V(nullptr, 0); | ||||||||
| 2330 | V.U.VAL = ~1ULL; | ||||||||
| 2331 | return V; | ||||||||
| 2332 | } | ||||||||
| 2333 | |||||||||
| 2334 | static unsigned getHashValue(const APInt &Key); | ||||||||
| 2335 | |||||||||
| 2336 | static bool isEqual(const APInt &LHS, const APInt &RHS) { | ||||||||
| 2337 | return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; | ||||||||
| 2338 | } | ||||||||
| 2339 | }; | ||||||||
| 2340 | |||||||||
| 2341 | } // namespace llvm | ||||||||
| 2342 | |||||||||
| 2343 | #endif |