blob: 0e49984c6ee3332c7c3cb9889ecd3cc0e1f6d8ce [file] [log] [blame]
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Our algorithm works by first identifying a subset of nodes that must always
// be instrumented. We call these nodes ambiguous because knowing the coverage
// of all remaining nodes is not enough to infer their coverage status.
//
// In general a node v is ambiguous if there exists two entry-to-terminal paths
// P_1 and P_2 such that:
// 1. v not in P_1 but P_1 visits a predecessor of v, and
// 2. v not in P_2 but P_2 visits a successor of v.
//
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
// coverage from the coverage of its predecessors, or if condition 2 fails, we
// can infer v’s coverage from the coverage of its successors.
//
// Sadly, there are example CFGs where it is not possible to infer all nodes
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
//
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
#define DEBUG_TYPE "pgo-block-coverage"
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
STATISTIC(NumIneligibleFunctions,
"Number of functions for which BCI cannot run on");
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
STATISTIC(NumInstrumentedBlocks,
"Number of basic blocks instrumented for coverage");
BlockCoverageInference::BlockCoverageInference(const Function &F,
bool ForceInstrumentEntry)
: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
findDependencies();
assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
++NumFunctions;
for (auto &BB : F) {
++NumBlocks;
if (shouldInstrumentBlock(BB))
++NumInstrumentedBlocks;
}
}
BlockCoverageInference::BlockSet
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
assert(BB.getParent() == &F);
BlockSet Dependencies;
auto It = PredecessorDependencies.find(&BB);
if (It != PredecessorDependencies.end())
Dependencies.set_union(It->second);
It = SuccessorDependencies.find(&BB);
if (It != SuccessorDependencies.end())
Dependencies.set_union(It->second);
return Dependencies;
}
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
JamCRC JC;
uint64_t Index = 0;
for (auto &BB : F) {
if (shouldInstrumentBlock(BB)) {
uint8_t Data[8];
support::endian::write64le(Data, Index);
JC.update(Data);
}
Index++;
}
return JC.getCRC();
}
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
assert(BB.getParent() == &F);
auto It = PredecessorDependencies.find(&BB);
if (It != PredecessorDependencies.end() && It->second.size())
return false;
It = SuccessorDependencies.find(&BB);
if (It != SuccessorDependencies.end() && It->second.size())
return false;
return true;
}
void BlockCoverageInference::findDependencies() {
assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
// Empirical analysis shows that this algorithm finishes within 5 seconds for
// functions with fewer than 1.5K blocks.
if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
++NumIneligibleFunctions;
return;
}
SmallVector<const BasicBlock *, 4> TerminalBlocks;
for (auto &BB : F)
if (succ_empty(&BB))
TerminalBlocks.push_back(&BB);
// Traverse the CFG backwards from the terminal blocks to make sure every
// block can reach some terminal block. Otherwise this algorithm will not work
// and we must fall back to instrumenting every block.
df_iterator_default_set<const BasicBlock *> Visited;
for (auto *BB : TerminalBlocks)
for (auto *N : inverse_depth_first_ext(BB, Visited))
(void)N;
if (F.size() != Visited.size()) {
++NumIneligibleFunctions;
return;
}
// The current implementation for computing `PredecessorDependencies` and
// `SuccessorDependencies` runs in quadratic time with respect to the number
// of basic blocks. While we do have a more complicated linear time algorithm
// in https://arxiv.org/abs/2208.13907 we do not know if it will give a
// significant speedup in practice given that most functions tend to be
// relatively small in size for intended use cases.
auto &EntryBlock = F.getEntryBlock();
for (auto &BB : F) {
// The set of blocks that are reachable while avoiding BB.
BlockSet ReachableFromEntry, ReachableFromTerminal;
getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
ReachableFromEntry);
for (auto *TerminalBlock : TerminalBlocks)
getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
ReachableFromTerminal);
auto Preds = predecessors(&BB);
bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
return ReachableFromEntry.count(Pred) &&
ReachableFromTerminal.count(Pred);
});
if (!HasSuperReachablePred)
for (auto *Pred : Preds)
if (ReachableFromEntry.count(Pred))
PredecessorDependencies[&BB].insert(Pred);
auto Succs = successors(&BB);
bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
return ReachableFromEntry.count(Succ) &&
ReachableFromTerminal.count(Succ);
});
if (!HasSuperReachableSucc)
for (auto *Succ : Succs)
if (ReachableFromTerminal.count(Succ))
SuccessorDependencies[&BB].insert(Succ);
}
if (ForceInstrumentEntry) {
// Force the entry block to be instrumented by clearing the blocks it can
// infer coverage from.
PredecessorDependencies[&EntryBlock].clear();
SuccessorDependencies[&EntryBlock].clear();
}
// Construct a graph where blocks are connected if there is a mutual
// dependency between them. This graph has a special property that it contains
// only paths.
DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
for (auto &BB : F) {
for (auto *Succ : successors(&BB)) {
if (SuccessorDependencies[&BB].count(Succ) &&
PredecessorDependencies[Succ].count(&BB)) {
AdjacencyList[&BB].insert(Succ);
AdjacencyList[Succ].insert(&BB);
}
}
}
// Given a path with at least one node, return the next node on the path.
auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
assert(Path.size());
auto &Neighbors = AdjacencyList[Path.back()];
if (Path.size() == 1) {
// This is the first node on the path, return its neighbor.
assert(Neighbors.size() == 1);
return Neighbors.front();
} else if (Neighbors.size() == 2) {
// This is the middle of the path, find the neighbor that is not on the
// path already.
assert(Path.size() >= 2);
return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
}
// This is the end of the path.
assert(Neighbors.size() == 1);
return nullptr;
};
// Remove all cycles in the inferencing graph.
for (auto &BB : F) {
if (AdjacencyList[&BB].size() == 1) {
// We found the head of some path.
BlockSet Path;
Path.insert(&BB);
while (const BasicBlock *Next = getNextOnPath(Path))
Path.insert(Next);
LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
// Remove these nodes from the graph so we don't discover this path again.
for (auto *BB : Path)
AdjacencyList[BB].clear();
// Finally, remove the cycles.
if (PredecessorDependencies[Path.front()].size()) {
for (auto *BB : Path)
if (BB != Path.back())
SuccessorDependencies[BB].clear();
} else {
for (auto *BB : Path)
if (BB != Path.front())
PredecessorDependencies[BB].clear();
}
}
}
LLVM_DEBUG(dump(dbgs()));
}
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
const BasicBlock &Avoid,
bool IsForward,
BlockSet &Reachable) const {
df_iterator_default_set<const BasicBlock *> Visited;
Visited.insert(&Avoid);
if (IsForward) {
auto Range = depth_first_ext(&Start, Visited);
Reachable.insert(Range.begin(), Range.end());
} else {
auto Range = inverse_depth_first_ext(&Start, Visited);
Reachable.insert(Range.begin(), Range.end());
}
}
namespace llvm {
class DotFuncBCIInfo {
private:
const BlockCoverageInference *BCI;
const DenseMap<const BasicBlock *, bool> *Coverage;
public:
DotFuncBCIInfo(const BlockCoverageInference *BCI,
const DenseMap<const BasicBlock *, bool> *Coverage)
: BCI(BCI), Coverage(Coverage) {}
const Function &getFunction() { return BCI->F; }
bool isInstrumented(const BasicBlock *BB) const {
return BCI->shouldInstrumentBlock(*BB);
}
bool isCovered(const BasicBlock *BB) const {
return Coverage && Coverage->lookup(BB);
}
bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
return BCI->getDependencies(*Src).count(Dest);
}
};
template <>
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
return &(Info->getFunction().getEntryBlock());
}
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
using nodes_iterator = pointer_iterator<Function::const_iterator>;
static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
return nodes_iterator(Info->getFunction().begin());
}
static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
return nodes_iterator(Info->getFunction().end());
}
static size_t size(DotFuncBCIInfo *Info) {
return Info->getFunction().size();
}
};
template <>
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
static std::string getGraphName(DotFuncBCIInfo *Info) {
return "BCI CFG for " + Info->getFunction().getName().str();
}
std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
return Node->getName().str();
}
std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
DotFuncBCIInfo *Info) {
const BasicBlock *Dest = *I;
if (Info->isDependent(Src, Dest))
return "color=red";
if (Info->isDependent(Dest, Src))
return "color=blue";
return "";
}
std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
std::string Result;
if (Info->isInstrumented(Node))
Result += "style=filled,fillcolor=gray";
if (Info->isCovered(Node))
Result += std::string(Result.empty() ? "" : ",") + "color=red";
return Result;
}
};
} // namespace llvm
void BlockCoverageInference::viewBlockCoverageGraph(
const DenseMap<const BasicBlock *, bool> *Coverage) const {
DotFuncBCIInfo Info(this, Coverage);
WriteGraph(&Info, "BCI", false,
"Block Coverage Inference for " + F.getName());
}
void BlockCoverageInference::dump(raw_ostream &OS) const {
OS << "Minimal block coverage for function \'" << F.getName()
<< "\' (Instrumented=*)\n";
for (auto &BB : F) {
OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
auto It = PredecessorDependencies.find(&BB);
if (It != PredecessorDependencies.end() && It->second.size())
OS << " PredDeps = " << getBlockNames(It->second) << "\n";
It = SuccessorDependencies.find(&BB);
if (It != SuccessorDependencies.end() && It->second.size())
OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
}
OS << " Instrumented Blocks Hash = 0x"
<< Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
}
std::string
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
std::string Result;
raw_string_ostream OS(Result);
OS << "[";
if (!BBs.empty()) {
OS << BBs.front()->getName();
BBs = BBs.drop_front();
}
for (auto *BB : BBs)
OS << ", " << BB->getName();
OS << "]";
return OS.str();
}