LLVM 19.0.0git
Lint.cpp
Go to the documentation of this file.
1//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass statically checks for common and easily-identified constructs
10// which produce undefined or likely unintended behavior in LLVM IR.
11//
12// It is not a guarantee of correctness, in two ways. First, it isn't
13// comprehensive. There are checks which could be done statically which are
14// not yet implemented. Some of these are indicated by TODO comments, but
15// those aren't comprehensive either. Second, many conditions cannot be
16// checked statically. This pass does no dynamic instrumentation, so it
17// can't check for all possible problems.
18//
19// Another limitation is that it assumes all code will be executed. A store
20// through a null pointer in a basic block which is never reached is harmless,
21// but this pass will warn about it anyway. This is the main reason why most
22// of these checks live here instead of in the Verifier pass.
23//
24// Optimization passes may make conditions that this pass checks for more or
25// less obvious. If an optimization pass appears to be introducing a warning,
26// it may be that the optimization pass is merely exposing an existing
27// condition in the code.
28//
29// This code may be run before instcombine. In many cases, instcombine checks
30// for the same kinds of things and turns instructions with undefined behavior
31// into unreachable (or equivalent). Because of this, this pass makes some
32// effort to look through bitcasts and so on.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/Analysis/Lint.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/Twine.h"
46#include "llvm/Analysis/Loads.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/Dominators.h"
59#include "llvm/IR/Function.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/InstrTypes.h"
63#include "llvm/IR/Instruction.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/PassManager.h"
68#include "llvm/IR/Type.h"
69#include "llvm/IR/Value.h"
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <string>
77
78using namespace llvm;
79
80static const char LintAbortOnErrorArgName[] = "lint-abort-on-error";
81static cl::opt<bool>
83 cl::desc("In the Lint pass, abort on errors."));
84
85namespace {
86namespace MemRef {
87static const unsigned Read = 1;
88static const unsigned Write = 2;
89static const unsigned Callee = 4;
90static const unsigned Branchee = 8;
91} // end namespace MemRef
92
93class Lint : public InstVisitor<Lint> {
94 friend class InstVisitor<Lint>;
95
97
98 void visitCallBase(CallBase &CB);
99 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
100 MaybeAlign Alignment, Type *Ty, unsigned Flags);
101
103 void visitLoadInst(LoadInst &I);
105 void visitXor(BinaryOperator &I);
106 void visitSub(BinaryOperator &I);
107 void visitLShr(BinaryOperator &I);
108 void visitAShr(BinaryOperator &I);
109 void visitShl(BinaryOperator &I);
110 void visitSDiv(BinaryOperator &I);
111 void visitUDiv(BinaryOperator &I);
112 void visitSRem(BinaryOperator &I);
113 void visitURem(BinaryOperator &I);
120
121 Value *findValue(Value *V, bool OffsetOk) const;
122 Value *findValueImpl(Value *V, bool OffsetOk,
123 SmallPtrSetImpl<Value *> &Visited) const;
124
125public:
126 Module *Mod;
127 const DataLayout *DL;
128 AliasAnalysis *AA;
129 AssumptionCache *AC;
130 DominatorTree *DT;
132
133 std::string Messages;
134 raw_string_ostream MessagesStr;
135
136 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA,
138 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI),
139 MessagesStr(Messages) {}
140
141 void WriteValues(ArrayRef<const Value *> Vs) {
142 for (const Value *V : Vs) {
143 if (!V)
144 continue;
145 if (isa<Instruction>(V)) {
146 MessagesStr << *V << '\n';
147 } else {
148 V->printAsOperand(MessagesStr, true, Mod);
149 MessagesStr << '\n';
150 }
151 }
152 }
153
154 /// A check failed, so printout out the condition and the message.
155 ///
156 /// This provides a nice place to put a breakpoint if you want to see why
157 /// something is not correct.
158 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
159
160 /// A check failed (with values to print).
161 ///
162 /// This calls the Message-only version so that the above is easier to set
163 /// a breakpoint on.
164 template <typename T1, typename... Ts>
165 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
166 CheckFailed(Message);
167 WriteValues({V1, Vs...});
168 }
169};
170} // end anonymous namespace
171
172// Check - We know that cond should be true, if not print an error message.
173#define Check(C, ...) \
174 do { \
175 if (!(C)) { \
176 CheckFailed(__VA_ARGS__); \
177 return; \
178 } \
179 } while (false)
180
181void Lint::visitFunction(Function &F) {
182 // This isn't undefined behavior, it's just a little unusual, and it's a
183 // fairly common mistake to neglect to name a function.
184 Check(F.hasName() || F.hasLocalLinkage(),
185 "Unusual: Unnamed function with non-local linkage", &F);
186
187 // TODO: Check for irreducible control flow.
188}
189
190void Lint::visitCallBase(CallBase &I) {
191 Value *Callee = I.getCalledOperand();
192
193 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt,
194 nullptr, MemRef::Callee);
195
196 if (Function *F = dyn_cast<Function>(findValue(Callee,
197 /*OffsetOk=*/false))) {
198 Check(I.getCallingConv() == F->getCallingConv(),
199 "Undefined behavior: Caller and callee calling convention differ",
200 &I);
201
202 FunctionType *FT = F->getFunctionType();
203 unsigned NumActualArgs = I.arg_size();
204
205 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
206 : FT->getNumParams() == NumActualArgs,
207 "Undefined behavior: Call argument count mismatches callee "
208 "argument count",
209 &I);
210
211 Check(FT->getReturnType() == I.getType(),
212 "Undefined behavior: Call return type mismatches "
213 "callee return type",
214 &I);
215
216 // Check argument types (in case the callee was casted) and attributes.
217 // TODO: Verify that caller and callee attributes are compatible.
218 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
219 auto AI = I.arg_begin(), AE = I.arg_end();
220 for (; AI != AE; ++AI) {
221 Value *Actual = *AI;
222 if (PI != PE) {
223 Argument *Formal = &*PI++;
224 Check(Formal->getType() == Actual->getType(),
225 "Undefined behavior: Call argument type mismatches "
226 "callee parameter type",
227 &I);
228
229 // Check that noalias arguments don't alias other arguments. This is
230 // not fully precise because we don't know the sizes of the dereferenced
231 // memory regions.
232 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
233 AttributeList PAL = I.getAttributes();
234 unsigned ArgNo = 0;
235 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
236 // Skip ByVal arguments since they will be memcpy'd to the callee's
237 // stack so we're not really passing the pointer anyway.
238 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
239 continue;
240 // If both arguments are readonly, they have no dependence.
241 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
242 continue;
243 // Skip readnone arguments since those are guaranteed not to be
244 // dereferenced anyway.
245 if (I.doesNotAccessMemory(ArgNo))
246 continue;
247 if (AI != BI && (*BI)->getType()->isPointerTy()) {
248 AliasResult Result = AA->alias(*AI, *BI);
249 Check(Result != AliasResult::MustAlias &&
251 "Unusual: noalias argument aliases another argument", &I);
252 }
253 }
254 }
255
256 // Check that an sret argument points to valid memory.
257 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
258 Type *Ty = Formal->getParamStructRetType();
259 MemoryLocation Loc(
260 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty)));
261 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty,
262 MemRef::Read | MemRef::Write);
263 }
264 }
265 }
266 }
267
268 if (const auto *CI = dyn_cast<CallInst>(&I)) {
269 if (CI->isTailCall()) {
270 const AttributeList &PAL = CI->getAttributes();
271 unsigned ArgNo = 0;
272 for (Value *Arg : I.args()) {
273 // Skip ByVal arguments since they will be memcpy'd to the callee's
274 // stack anyway.
275 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
276 continue;
277 Value *Obj = findValue(Arg, /*OffsetOk=*/true);
278 Check(!isa<AllocaInst>(Obj),
279 "Undefined behavior: Call with \"tail\" keyword references "
280 "alloca",
281 &I);
282 }
283 }
284 }
285
286 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
287 switch (II->getIntrinsicID()) {
288 default:
289 break;
290
291 // TODO: Check more intrinsics
292
293 case Intrinsic::memcpy: {
294 MemCpyInst *MCI = cast<MemCpyInst>(&I);
295 visitMemoryReference(I, MemoryLocation::getForDest(MCI),
296 MCI->getDestAlign(), nullptr, MemRef::Write);
297 visitMemoryReference(I, MemoryLocation::getForSource(MCI),
298 MCI->getSourceAlign(), nullptr, MemRef::Read);
299
300 // Check that the memcpy arguments don't overlap. The AliasAnalysis API
301 // isn't expressive enough for what we really want to do. Known partial
302 // overlap is not distinguished from the case where nothing is known.
304 if (const ConstantInt *Len =
305 dyn_cast<ConstantInt>(findValue(MCI->getLength(),
306 /*OffsetOk=*/false)))
307 if (Len->getValue().isIntN(32))
308 Size = LocationSize::precise(Len->getValue().getZExtValue());
309 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
311 "Undefined behavior: memcpy source and destination overlap", &I);
312 break;
313 }
314 case Intrinsic::memcpy_inline: {
315 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
316 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
317 visitMemoryReference(I, MemoryLocation::getForDest(MCII),
318 MCII->getDestAlign(), nullptr, MemRef::Write);
319 visitMemoryReference(I, MemoryLocation::getForSource(MCII),
320 MCII->getSourceAlign(), nullptr, MemRef::Read);
321
322 // Check that the memcpy arguments don't overlap. The AliasAnalysis API
323 // isn't expressive enough for what we really want to do. Known partial
324 // overlap is not distinguished from the case where nothing is known.
326 Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
328 "Undefined behavior: memcpy source and destination overlap", &I);
329 break;
330 }
331 case Intrinsic::memmove: {
332 MemMoveInst *MMI = cast<MemMoveInst>(&I);
333 visitMemoryReference(I, MemoryLocation::getForDest(MMI),
334 MMI->getDestAlign(), nullptr, MemRef::Write);
335 visitMemoryReference(I, MemoryLocation::getForSource(MMI),
336 MMI->getSourceAlign(), nullptr, MemRef::Read);
337 break;
338 }
339 case Intrinsic::memset: {
340 MemSetInst *MSI = cast<MemSetInst>(&I);
341 visitMemoryReference(I, MemoryLocation::getForDest(MSI),
342 MSI->getDestAlign(), nullptr, MemRef::Write);
343 break;
344 }
345 case Intrinsic::memset_inline: {
346 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
347 visitMemoryReference(I, MemoryLocation::getForDest(MSII),
348 MSII->getDestAlign(), nullptr, MemRef::Write);
349 break;
350 }
351
352 case Intrinsic::vastart:
353 Check(I.getParent()->getParent()->isVarArg(),
354 "Undefined behavior: va_start called in a non-varargs function",
355 &I);
356
357 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
358 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
359 break;
360 case Intrinsic::vacopy:
361 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
362 std::nullopt, nullptr, MemRef::Write);
363 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI),
364 std::nullopt, nullptr, MemRef::Read);
365 break;
366 case Intrinsic::vaend:
367 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
368 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
369 break;
370
371 case Intrinsic::stackrestore:
372 // Stackrestore doesn't read or write memory, but it sets the
373 // stack pointer, which the compiler may read from or write to
374 // at any time, so check it for both readability and writeability.
375 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
376 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
377 break;
378 case Intrinsic::get_active_lane_mask:
379 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
380 Check(!TripCount->isZero(),
381 "get_active_lane_mask: operand #2 "
382 "must be greater than 0",
383 &I);
384 break;
385 }
386}
387
388void Lint::visitReturnInst(ReturnInst &I) {
389 Function *F = I.getParent()->getParent();
390 Check(!F->doesNotReturn(),
391 "Unusual: Return statement in function with noreturn attribute", &I);
392
393 if (Value *V = I.getReturnValue()) {
394 Value *Obj = findValue(V, /*OffsetOk=*/true);
395 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
396 }
397}
398
399// TODO: Check that the reference is in bounds.
400// TODO: Check readnone/readonly function attributes.
401void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
402 MaybeAlign Align, Type *Ty, unsigned Flags) {
403 // If no memory is being referenced, it doesn't matter if the pointer
404 // is valid.
405 if (Loc.Size.isZero())
406 return;
407
408 Value *Ptr = const_cast<Value *>(Loc.Ptr);
409 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
410 Check(!isa<ConstantPointerNull>(UnderlyingObject),
411 "Undefined behavior: Null pointer dereference", &I);
412 Check(!isa<UndefValue>(UnderlyingObject),
413 "Undefined behavior: Undef pointer dereference", &I);
414 Check(!isa<ConstantInt>(UnderlyingObject) ||
415 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
416 "Unusual: All-ones pointer dereference", &I);
417 Check(!isa<ConstantInt>(UnderlyingObject) ||
418 !cast<ConstantInt>(UnderlyingObject)->isOne(),
419 "Unusual: Address one pointer dereference", &I);
420
421 if (Flags & MemRef::Write) {
422 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
423 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
424 &I);
425 Check(!isa<Function>(UnderlyingObject) &&
426 !isa<BlockAddress>(UnderlyingObject),
427 "Undefined behavior: Write to text section", &I);
428 }
429 if (Flags & MemRef::Read) {
430 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
431 &I);
432 Check(!isa<BlockAddress>(UnderlyingObject),
433 "Undefined behavior: Load from block address", &I);
434 }
435 if (Flags & MemRef::Callee) {
436 Check(!isa<BlockAddress>(UnderlyingObject),
437 "Undefined behavior: Call to block address", &I);
438 }
439 if (Flags & MemRef::Branchee) {
440 Check(!isa<Constant>(UnderlyingObject) ||
441 isa<BlockAddress>(UnderlyingObject),
442 "Undefined behavior: Branch to non-blockaddress", &I);
443 }
444
445 // Check for buffer overflows and misalignment.
446 // Only handles memory references that read/write something simple like an
447 // alloca instruction or a global variable.
448 int64_t Offset = 0;
450 // OK, so the access is to a constant offset from Ptr. Check that Ptr is
451 // something we can handle and if so extract the size of this base object
452 // along with its alignment.
454 MaybeAlign BaseAlign;
455
456 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
457 Type *ATy = AI->getAllocatedType();
458 if (!AI->isArrayAllocation() && ATy->isSized())
459 BaseSize = DL->getTypeAllocSize(ATy);
460 BaseAlign = AI->getAlign();
461 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
462 // If the global may be defined differently in another compilation unit
463 // then don't warn about funky memory accesses.
464 if (GV->hasDefinitiveInitializer()) {
465 Type *GTy = GV->getValueType();
466 if (GTy->isSized())
467 BaseSize = DL->getTypeAllocSize(GTy);
468 BaseAlign = GV->getAlign();
469 if (!BaseAlign && GTy->isSized())
470 BaseAlign = DL->getABITypeAlign(GTy);
471 }
472 }
473
474 // Accesses from before the start or after the end of the object are not
475 // defined.
476 Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
477 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
478 "Undefined behavior: Buffer overflow", &I);
479
480 // Accesses that say that the memory is more aligned than it is are not
481 // defined.
482 if (!Align && Ty && Ty->isSized())
483 Align = DL->getABITypeAlign(Ty);
484 if (BaseAlign && Align)
485 Check(*Align <= commonAlignment(*BaseAlign, Offset),
486 "Undefined behavior: Memory reference address is misaligned", &I);
487 }
488}
489
490void Lint::visitLoadInst(LoadInst &I) {
491 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(),
492 MemRef::Read);
493}
494
495void Lint::visitStoreInst(StoreInst &I) {
496 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(),
497 I.getOperand(0)->getType(), MemRef::Write);
498}
499
500void Lint::visitXor(BinaryOperator &I) {
501 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
502 "Undefined result: xor(undef, undef)", &I);
503}
504
505void Lint::visitSub(BinaryOperator &I) {
506 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
507 "Undefined result: sub(undef, undef)", &I);
508}
509
510void Lint::visitLShr(BinaryOperator &I) {
511 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
512 /*OffsetOk=*/false)))
513 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
514 "Undefined result: Shift count out of range", &I);
515}
516
517void Lint::visitAShr(BinaryOperator &I) {
518 if (ConstantInt *CI =
519 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
520 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
521 "Undefined result: Shift count out of range", &I);
522}
523
524void Lint::visitShl(BinaryOperator &I) {
525 if (ConstantInt *CI =
526 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
527 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
528 "Undefined result: Shift count out of range", &I);
529}
530
531static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
532 AssumptionCache *AC) {
533 // Assume undef could be zero.
534 if (isa<UndefValue>(V))
535 return true;
536
537 VectorType *VecTy = dyn_cast<VectorType>(V->getType());
538 if (!VecTy) {
539 KnownBits Known =
540 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
541 return Known.isZero();
542 }
543
544 // Per-component check doesn't work with zeroinitializer
545 Constant *C = dyn_cast<Constant>(V);
546 if (!C)
547 return false;
548
549 if (C->isZeroValue())
550 return true;
551
552 // For a vector, KnownZero will only be true if all values are zero, so check
553 // this per component
554 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements();
555 I != N; ++I) {
556 Constant *Elem = C->getAggregateElement(I);
557 if (isa<UndefValue>(Elem))
558 return true;
559
560 KnownBits Known = computeKnownBits(Elem, DL);
561 if (Known.isZero())
562 return true;
563 }
564
565 return false;
566}
567
568void Lint::visitSDiv(BinaryOperator &I) {
569 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
570 "Undefined behavior: Division by zero", &I);
571}
572
573void Lint::visitUDiv(BinaryOperator &I) {
574 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
575 "Undefined behavior: Division by zero", &I);
576}
577
578void Lint::visitSRem(BinaryOperator &I) {
579 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
580 "Undefined behavior: Division by zero", &I);
581}
582
583void Lint::visitURem(BinaryOperator &I) {
584 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
585 "Undefined behavior: Division by zero", &I);
586}
587
588void Lint::visitAllocaInst(AllocaInst &I) {
589 if (isa<ConstantInt>(I.getArraySize()))
590 // This isn't undefined behavior, it's just an obvious pessimization.
591 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
592 "Pessimization: Static alloca outside of entry block", &I);
593
594 // TODO: Check for an unusual size (MSB set?)
595}
596
597void Lint::visitVAArgInst(VAArgInst &I) {
598 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr,
599 MemRef::Read | MemRef::Write);
600}
601
602void Lint::visitIndirectBrInst(IndirectBrInst &I) {
603 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()),
604 std::nullopt, nullptr, MemRef::Branchee);
605
606 Check(I.getNumDestinations() != 0,
607 "Undefined behavior: indirectbr with no destinations", &I);
608}
609
610void Lint::visitExtractElementInst(ExtractElementInst &I) {
611 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
612 /*OffsetOk=*/false)))
613 Check(
614 CI->getValue().ult(
615 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
616 "Undefined result: extractelement index out of range", &I);
617}
618
619void Lint::visitInsertElementInst(InsertElementInst &I) {
620 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
621 /*OffsetOk=*/false)))
622 Check(CI->getValue().ult(
623 cast<FixedVectorType>(I.getType())->getNumElements()),
624 "Undefined result: insertelement index out of range", &I);
625}
626
627void Lint::visitUnreachableInst(UnreachableInst &I) {
628 // This isn't undefined behavior, it's merely suspicious.
629 Check(&I == &I.getParent()->front() ||
630 std::prev(I.getIterator())->mayHaveSideEffects(),
631 "Unusual: unreachable immediately preceded by instruction without "
632 "side effects",
633 &I);
634}
635
636/// findValue - Look through bitcasts and simple memory reference patterns
637/// to identify an equivalent, but more informative, value. If OffsetOk
638/// is true, look through getelementptrs with non-zero offsets too.
639///
640/// Most analysis passes don't require this logic, because instcombine
641/// will simplify most of these kinds of things away. But it's a goal of
642/// this Lint pass to be useful even on non-optimized IR.
643Value *Lint::findValue(Value *V, bool OffsetOk) const {
645 return findValueImpl(V, OffsetOk, Visited);
646}
647
648/// findValueImpl - Implementation helper for findValue.
649Value *Lint::findValueImpl(Value *V, bool OffsetOk,
650 SmallPtrSetImpl<Value *> &Visited) const {
651 // Detect self-referential values.
652 if (!Visited.insert(V).second)
653 return UndefValue::get(V->getType());
654
655 // TODO: Look through sext or zext cast, when the result is known to
656 // be interpreted as signed or unsigned, respectively.
657 // TODO: Look through eliminable cast pairs.
658 // TODO: Look through calls with unique return values.
659 // TODO: Look through vector insert/extract/shuffle.
660 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts();
661 if (LoadInst *L = dyn_cast<LoadInst>(V)) {
662 BasicBlock::iterator BBI = L->getIterator();
663 BasicBlock *BB = L->getParent();
664 SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
665 BatchAAResults BatchAA(*AA);
666 for (;;) {
667 if (!VisitedBlocks.insert(BB).second)
668 break;
669 if (Value *U =
670 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
671 return findValueImpl(U, OffsetOk, Visited);
672 if (BBI != BB->begin())
673 break;
674 BB = BB->getUniquePredecessor();
675 if (!BB)
676 break;
677 BBI = BB->end();
678 }
679 } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
680 if (Value *W = PN->hasConstantValue())
681 return findValueImpl(W, OffsetOk, Visited);
682 } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
683 if (CI->isNoopCast(*DL))
684 return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
685 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
686 if (Value *W =
687 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices()))
688 if (W != V)
689 return findValueImpl(W, OffsetOk, Visited);
690 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
691 // Same as above, but for ConstantExpr instead of Instruction.
692 if (Instruction::isCast(CE->getOpcode())) {
694 CE->getOperand(0)->getType(), CE->getType(),
695 *DL))
696 return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
697 }
698 }
699
700 // As a last resort, try SimplifyInstruction or constant folding.
701 if (Instruction *Inst = dyn_cast<Instruction>(V)) {
702 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC}))
703 return findValueImpl(W, OffsetOk, Visited);
704 } else if (auto *C = dyn_cast<Constant>(V)) {
705 Value *W = ConstantFoldConstant(C, *DL, TLI);
706 if (W != V)
707 return findValueImpl(W, OffsetOk, Visited);
708 }
709
710 return V;
711}
712
714 auto *Mod = F.getParent();
715 auto *DL = &F.getParent()->getDataLayout();
716 auto *AA = &AM.getResult<AAManager>(F);
717 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
718 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
719 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
720 Lint L(Mod, DL, AA, AC, DT, TLI);
721 L.visit(F);
722 dbgs() << L.MessagesStr.str();
723 if (LintAbortOnError && !L.MessagesStr.str().empty())
724 report_fatal_error(Twine("Linter found errors, aborting. (enabled by --") +
726 false);
727 return PreservedAnalyses::all();
728}
729
730//===----------------------------------------------------------------------===//
731// Implement the public interfaces to this file...
732//===----------------------------------------------------------------------===//
733
734/// lintFunction - Check a function for errors, printing messages on stderr.
735///
737 Function &F = const_cast<Function &>(f);
738 assert(!F.isDeclaration() && "Cannot lint external functions");
739
741 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
742 FAM.registerPass([&] { return DominatorTreeAnalysis(); });
743 FAM.registerPass([&] { return AssumptionAnalysis(); });
744 FAM.registerPass([&] {
745 AAManager AA;
749 return AA;
750 });
751 LintPass().run(F, FAM);
752}
753
754/// lintModule - Check a module for errors, printing messages on stderr.
755///
756void llvm::lintModule(const Module &M) {
757 for (const Function &F : M) {
758 if (!F.isDeclaration())
760 }
761}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Size
#define Check(C,...)
Definition: Lint.cpp:173
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:531
static const char LintAbortOnErrorArgName[]
Definition: Lint.cpp:80
static cl::opt< bool > LintAbortOnError(LintAbortOnErrorArgName, cl::init(false), cl::desc("In the Lint pass, abort on errors."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility analysis objects describing memory locations.
#define T1
Module.h This file contains the declarations for the Module class.
Module * Mod
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is the interface for a metadata-based scoped no-alias analysis.
This file defines the SmallPtrSet class.
This is the interface for a metadata-based TBAA.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
The possible results of an alias query.
Definition: AliasAnalysis.h:81
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:562
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:264
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
Definition: Function.cpp:300
Type * getParamStructRetType() const
If this is an sret argument, return its type.
Definition: Function.cpp:228
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:279
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the attribute exists for the given argument.
Definition: Attributes.h:783
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
Analysis pass providing a never-invalidated alias analysis result.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:447
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
Indirect Branch Instruction.
This instruction inserts a single (scalar) element into a VectorType value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitIndirectBrInst(IndirectBrInst &I)
Definition: InstVisitor.h:235
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
void visitFunction(Function &F)
Definition: InstVisitor.h:142
RetTy visitUnreachableInst(UnreachableInst &I)
Definition: InstVisitor.h:241
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:226
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
RetTy visitVAArgInst(VAArgInst &I)
Definition: InstVisitor.h:190
bool isCast() const
Definition: Instruction.h:259
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Lint.cpp:713
An instruction for reading from memory.
Definition: Instructions.h:184
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
bool isZero() const
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
This class wraps the llvm.memcpy.inline intrinsic.
ConstantInt * getLength() const
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset.inline intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
Return a value (possibly void), from a function.
Analysis pass providing a never-invalidated alias analysis result.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
An instruction for storing to memory.
Definition: Instructions.h:317
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Analysis pass providing a never-invalidated alias analysis result.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
This function has undefined behavior.
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
Definition: Lint.cpp:86
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:453
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void lintModule(const Module &M)
Lint a module.
Definition: Lint.cpp:756
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
@ Mod
The access may modify the value stored in memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, std::optional< BasicBlock::iterator > InsertBefore=std::nullopt)
Given an aggregate and an sequence of indices, see if the scalar value indexed is already around as a...
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:736
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:77
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117