From db7f7558a36f56693ae459abdf77ca8f7eff9ca7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 27 Mar 2026 16:37:28 +0100 Subject: [PATCH 01/29] Start adding MonoIFDS to phasar --- include/phasar/ControlFlow/CFG.h | 9 + include/phasar/ControlFlow/ControlFlowOrder.h | 49 ++ .../DataFlow/MonoIfds/ArraySetWorkList.h | 29 + .../DataFlow/MonoIfds/DataFlowEnvironment.h | 34 ++ .../DataFlow/MonoIfds/IterationStrategy.h | 41 ++ .../phasar/DataFlow/MonoIfds/MonoIFDSConfig.h | 22 + .../DataFlow/MonoIfds/MonoIFDSProblem.h | 49 ++ .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 577 ++++++++++++++++++ .../phasar/DataFlow/MonoIfds/RPOWorkList.h | 59 ++ include/phasar/Utils/ArraySet.h | 100 +++ include/phasar/Utils/FunctionCompressor.h | 25 + include/phasar/Utils/Lazy.h | 38 ++ include/phasar/Utils/StrongTypeDef.h | 96 +++ include/phasar/Utils/UsedGlobalsHolder.h | 27 + lib/ControlFlow/ControlFlowOrder.cpp | 182 ++++++ lib/Utils/FunctionCompressor.cpp | 7 + .../DataFlow/IfdsIde/CMakeLists.txt | 1 + .../DataFlow/IfdsIde/MonoIFDSSolverTest.cpp | 1 + 18 files changed, 1346 insertions(+) create mode 100644 include/phasar/ControlFlow/ControlFlowOrder.h create mode 100644 include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h create mode 100644 include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h create mode 100644 include/phasar/DataFlow/MonoIfds/IterationStrategy.h create mode 100644 include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h create mode 100644 include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h create mode 100644 include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h create mode 100644 include/phasar/DataFlow/MonoIfds/RPOWorkList.h create mode 100644 include/phasar/Utils/ArraySet.h create mode 100644 include/phasar/Utils/FunctionCompressor.h create mode 100644 include/phasar/Utils/Lazy.h create mode 100644 include/phasar/Utils/StrongTypeDef.h create mode 100644 include/phasar/Utils/UsedGlobalsHolder.h create mode 100644 lib/ControlFlow/ControlFlowOrder.cpp create mode 100644 lib/Utils/FunctionCompressor.cpp create mode 100644 unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp diff --git a/include/phasar/ControlFlow/CFG.h b/include/phasar/ControlFlow/CFG.h index 3200b0c17a..da8f7e77bc 100644 --- a/include/phasar/ControlFlow/CFG.h +++ b/include/phasar/ControlFlow/CFG.h @@ -8,6 +8,7 @@ *****************************************************************************/ #pragma once +#include "phasar/Utils/Nullable.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/Support/raw_ostream.h" @@ -87,4 +88,12 @@ concept CFGEdgesProvider = requires(const T &CF, typename T::f_t Fun) { CF.getAllControlFlowEdges(Fun) } -> psr::is_iterable_over_v>; }; + +template +concept IsBlockAwareControlFlow = requires(const T &CF, typename T::n_t Inst) { + { + CF.getUniqueSuccessor(Inst) + } -> std::convertible_to>; + { CF.hasUniquePredecessor(Inst) } -> std::convertible_to; +}; } // namespace psr diff --git a/include/phasar/ControlFlow/ControlFlowOrder.h b/include/phasar/ControlFlow/ControlFlowOrder.h new file mode 100644 index 0000000000..74a8c19aab --- /dev/null +++ b/include/phasar/ControlFlow/ControlFlowOrder.h @@ -0,0 +1,49 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/IR/Instruction.h" + +#include + +namespace psr { + +struct ControlFlowOrder { + enum class CFGOrderId : uint32_t {}; + + Compressor Order; + + [[nodiscard]] auto begin() const noexcept { + return std::make_reverse_iterator(Order.begin()); + } + [[nodiscard]] auto end() const noexcept { + return std::make_reverse_iterator(Order.end()); + } +}; + +// TODO: Make ehtis independent from LLVM IR +void computeCFGOrder(ControlFlowOrder &Into, const llvm::Function *Fun); +void computeCFGOrder( + ControlFlowOrder &Into, const SCCHolder &SCCs, + SCCId SCC, const psr::LLVMBasedCallGraph &CG, + const Compressor &Functions); + +[[nodiscard]] inline ControlFlowOrder +computeCFGOrder(const llvm::Function *Fun) { + ControlFlowOrder Ret; + computeCFGOrder(Ret, Fun); + return Ret; +} +} // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h new file mode 100644 index 0000000000..294f866e2e --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h @@ -0,0 +1,29 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/ArraySet.h" + +namespace psr::monoifds { +template class ArraySetDriver { +public: + void push(ItemT Item) { WL.insert(std::move(Item)); } + + template + LLVM_ATTRIBUTE_ALWAYS_INLINE void run(HandlerT Handler) { + WL.foreach (std::move(Handler)); + } + + [[nodiscard]] constexpr bool empty() const noexcept { return WL.empty(); } + +private: + ArraySet WL; +}; +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h new file mode 100644 index 0000000000..8deb2f11d2 --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h @@ -0,0 +1,34 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/BitSet.h" +#include "phasar/Utils/StrongTypeDef.h" + +#include "llvm/ADT/DenseMap.h" + +PHASAR_STRONG_TYPEDEF(psr::monoifds, uint32_t, SourceFactId); + +namespace psr::monoifds { + +using SourceFactSet = BitSet; + +/// The local analysis state: TargetFact-->{SourceFact} + +template +struct DataFlowEnvironment : llvm::SmallDenseMap { + using llvm::SmallDenseMap::SmallDenseMap; + + // For env-versioning + uint32_t AnalyzedVersion = 0; + uint32_t Version = 1; +}; + +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/IterationStrategy.h b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h new file mode 100644 index 0000000000..159c6c9b36 --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h @@ -0,0 +1,41 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" + +#include + +namespace psr::monoifds { +enum class IterationStrategy : uint8_t { + DedupFIFOQueue, + TopoPrioQueue, + Hybrid, + HybridCapped, +}; + +[[nodiscard]] constexpr llvm::StringRef +to_string(IterationStrategy IterStrategy) noexcept { + switch (IterStrategy) { + case IterationStrategy::DedupFIFOQueue: + return "queue"; + case IterationStrategy::TopoPrioQueue: + return "topo"; + case IterationStrategy::Hybrid: + return "hybrid"; + case IterationStrategy::HybridCapped: + return "hybrid-capped"; + } + llvm_unreachable("All valid IterationStrategy alternatives should be handled " + "in the switch above"); +} + +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h new file mode 100644 index 0000000000..6a09815030 --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h @@ -0,0 +1,22 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/DataFlow/MonoIfds/IterationStrategy.h" + +namespace psr::monoifds { +struct MonoIfdsConfig { + IterationStrategy IterStrategy = IterationStrategy::DedupFIFOQueue; + bool EnableAggressiveLoopPriorization = false; + bool EnableEnvVersioning = false; + bool EagerReturnPropagation = false; +}; + +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h new file mode 100644 index 0000000000..e6af198fed --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -0,0 +1,49 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/Domain/AnalysisDomain.h" +#include "phasar/Utils/TypeTraits.h" + +#include + +namespace psr::monoifds { + +template +concept MonoIFDSAnalysisDomain = IsAnalysisDomain; + +template +concept MonoIFDSProblem = + requires(T &Problem, + DataFlowEnvironment &InOut, + typename T::ProblemAnalysisDomain::n_t Inst, + const typename T::ProblemAnalysisDomain::n_t &Fact, + const typename T::ProblemAnalysisDomain::f_t &Fun) { + typename T::ProblemAnalysisDomain; + requires MonoIFDSAnalysisDomain; + + Problem.normalFlow(InOut, Inst); + Problem.callToRetFlow(InOut, Inst); + { + Problem.returnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + { + Problem.invReturnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + { + Problem.getZeroValue() + } -> std::convertible_to; + + Problem.initialSeeds(InOut, Fun); + }; +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h new file mode 100644 index 0000000000..0e772e59ff --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -0,0 +1,577 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/CFG.h" +#include "phasar/ControlFlow/ControlFlowOrder.h" +#include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSProblem.h" +#include "phasar/DataFlow/MonoIfds/RPOWorkList.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/Lazy.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/Nullable.h" +#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/TypedVector.h" +#include "phasar/Utils/UsedGlobalsHolder.h" + +#include +#include +#include +#include + +#include + +namespace psr::monoifds { + +template class MonoIFDSSolver { +public: + using n_t = typename ProblemT::ProblemAnalysisDomain::n_t; + using d_t = typename ProblemT::ProblemAnalysisDomain::d_t; + using i_t = typename ProblemT::ProblemAnalysisDomain::i_t; + using f_t = typename ProblemT::ProblemAnalysisDomain::f_t; + using v_t = typename ProblemT::ProblemAnalysisDomain::v_t; + + explicit MonoIFDSSolver(ProblemT *Problem, const i_t *ICF, + std::pmr::polymorphic_allocator<> Alloc = + std::pmr::get_default_resource()) + : Problem(Problem), ICF(ICF), MBufRes(Alloc.resource()) {} + + MonoIFDSSolver &setConfig(MonoIfdsConfig Config) & noexcept { + this->Config = Config; + return *this; + } + + MonoIFDSSolver &setCGSCCs(const SCCHolder *SCCs) & noexcept { + this->SCCs = SCCs; + return *this; + } + + MonoIFDSSolver + setFunctionCompressor(const FunctionCompressor *Functions) & noexcept { + this->Functions = Functions; + return *this; + } + + MonoIFDSSolver & + setUsedGlobals(const UsedGlobalsHolder *UsedGlobals) & noexcept { + this->UsedGlobals = UsedGlobals; + return *this; + } + + void solve(); + +private: + // NOTE: Used the node_hash_map from + // [parallel-hash-map](https://github.com/greg7mdp/parallel-hashmap) here + // for the paper-eval! + template + using node_hash_map = std::pmr::unordered_map; + + struct FunctionSummary { + Compressor SourceFactIds; + DataFlowEnvironment EndSummary; + + [[clang::require_explicit_initialization]] node_hash_map< + std::pair, SourceFactSet> LeakIf; + }; + + struct IntermediateState { + node_hash_map> PathEdges; + node_hash_map> Incoming; + + llvm::SmallDenseSet HasNewLeaks; + llvm::SmallDenseSet HasNewSummary; + + std::reference_wrapper< + const llvm::SmallDenseSet> + PermittedGlobals; + SCCId CurrSCC; + bool InRecursion; + + IntermediateState(std::pmr::memory_resource *MRes, + const UsedGlobalsHolder &UsedGlobals, + SCCId CurrSCC, bool InRecursion) + : PathEdges(MRes), Incoming(MRes), + PermittedGlobals(std::cref(UsedGlobals.GlobsPerSCC[CurrSCC])), + CurrSCC(CurrSCC), InRecursion(InRecursion) {} + }; + + struct Mapper { + TypedVector Mapping; + BitSet ComputedMappings; + + explicit Mapper(size_t NumCalleeSrcFacts) { + Mapping.resize(NumCalleeSrcFacts); + ComputedMappings.reserve(NumCalleeSrcFacts); + } + + void reset() { + for (auto &SrcFacts : Mapping) { + SrcFacts.clear(); + } + ComputedMappings.clear(); + } + + const SourceFactSet &getSourceFactsFor(auto &Solver, + const DataFlowEnvironment &In, + const FunctionSummary &CalleeSum, + SourceFactId CalleeSrc, + ByConstRef CallInst) { + auto &Ret = Mapping[CalleeSrc]; + + if (ComputedMappings.tryInsert(CalleeSrc)) { + auto &&CSFacts = Solver.Problem->invReturnFlow( + CallInst, CalleeSum.SourceFactIds[CalleeSrc]); + + for (const auto *Fact : CSFacts) { + if (const auto *FactSrc = getOrNull(In, Fact)) { + Ret.insertAllOf(*FactSrc); + } + } + } + + return Ret; + } + + void insertAllSrcFactsFor(SourceFactSet &Into, auto &Solver, + const DataFlowEnvironment &In, + const FunctionSummary &CalleeSum, + const SourceFactSet &CalleeSrcs, + ByConstRef CallInst) { + CalleeSrcs.foreach ([&](auto SrcFactId) { + Into.insertAllOf( + getSourceFactsFor(Solver, In, CalleeSum, SrcFactId, CallInst)); + }); + } + + [[nodiscard]] SourceFactSet + getAllSrcFactsFor(auto &Solver, const DataFlowEnvironment &In, + const FunctionSummary &CalleeSum, + const SourceFactSet &CalleeSrcs, + ByConstRef CallInst) { + SourceFactSet Ret; + insertAllSrcFactsFor(Ret, Solver, In, CalleeSum, CalleeSrcs, CallInst); + return Ret; + } + }; + + void computeFixpointForSCC(SCCId CurrSCC, + llvm::ArrayRef CurrFuns) { + const size_t SCCSize = CurrFuns.size(); + const bool InRecursion = SCCSize > 1; + IntermediateState IState(&PoolRes, *UsedGlobals, CurrSCC, InRecursion); + + const auto IterStrategy = Config.IterStrategy; + const bool UseTopoFixpointDriver = [=] { + if (IterStrategy == IterationStrategy::DedupFIFOQueue) { + return false; + } + + if (IterStrategy == IterationStrategy::HybridCapped) { + // return SCCSize < 20; + return SCCSize == 1; + } + + return true; + }(); + + ControlFlowOrder CFO; + if (UseTopoFixpointDriver) { + // TODO: implement computeCFGOrder() + computeCFGOrder(CFO, SCCs, CurrSCC, *ICF, Functions); + } + + ArraySetDriver DefaultDriver; + TopoFixpointDriver TopoDriver; + + const auto ComputeFixpointWithDriver = + [&](auto &Driver) LLVM_ATTRIBUTE_NOINLINE { + for (auto FunId : llvm::reverse(CurrFuns)) { + const auto *Fun = (*Functions)[FunId]; + submitInitialSeeds(IState, Driver, Summaries[FunId].SourceFactIds, + Fun); + } + Driver.run([&](n_t BlockStart) { + analyzeBlock(IState, Driver, BlockStart); + }); + assert(Driver.empty()); + + llvm::errs() << '.'; + }; + + const auto RepropagateInRecursion = [&](auto &Driver) { + rescheduleCalls(IState, Driver, SCCs, CurrSCC, Functions); + while (!Driver.empty()) { + Driver.run( + [&](n_t BlockStart) { analyzeBlock(IState, Driver, BlockStart); }); + assert(Driver.empty()); + + rescheduleCalls(IState, Driver, SCCs, CurrSCC, Functions); + llvm::errs() << '.'; + } + + ITST_ASSERT(IState.HasNewSummary.empty(), + "After repropagating, we should not have any summary " + "applications pending"); + }; + if (UseTopoFixpointDriver) { + ComputeFixpointWithDriver(TopoDriver); + } else { + ComputeFixpointWithDriver(DefaultDriver); + } + + if (!Config.EagerReturnPropagation) { + if (IterStrategy == IterationStrategy::TopoPrioQueue) { + RepropagateInRecursion(TopoDriver); + } else { + RepropagateInRecursion(DefaultDriver); + } + } + + repropagateLeaks(IState, SCCs, CurrSCC, Functions); + } + + void submitInitialSeeds(IntermediateState &IState, auto &Driver, + Compressor &SeedCompressor, + ByConstRef Fun) { + const auto &SPs = ICF->getStartPointsOf(Fun); + + const auto &Zero = Problem->getZeroValue(); + SeedCompressor.insert(Zero); + assert(SeedCompressor.get(Zero) == SourceFactId(0) && + "The Zero value must always have Id 0!"); + + for (const auto &SP : SPs) { + auto &SeedState = IState.PathEdges[SP]; + SeedState[Zero].insert(SourceFactId(0)); + + Problem->initialSeeds(SeedState, Fun); + Driver.push(SP); + } + } + + void rescheduleCalls(IntermediateState &IState, auto &Driver) { + if (!IState.InRecursion) { + return; + } + + const bool EnableEnvVersioning = Config.EnableEnvVersioning; + + for (auto FunId : IState.HasNewSummary) { + IState.HasNewLeaks.erase(FunId); + const auto &Fun = Functions[FunId]; + + for (const auto &CS : getOrDefault(IState.Incoming, Fun)) { + const auto &CSFun = ICF->getFunctionOf(CS); + if (auto CallerId = Functions->getOrNull(CSFun)) { + Driver.push(CS); + if (EnableEnvVersioning) { + IState.PathEdges[CS].Version++; + } + } + } + } + IState.HasNewSummary.clear(); + } + + void repropagateLeaks(IntermediateState &IState, SCCId CurrSCC) { + llvm::SmallDenseSet NewLeaksWL; + while (!IState.HasNewLeaks.empty()) { + NewLeaksWL.swap(IState.HasNewLeaks); + + for (auto FunId : NewLeaksWL) { + handleLeaksForFun(IState, SCCs, CurrSCC, Functions, FunId); + } + NewLeaksWL.clear(); + } + } + + void handleLeaksForFun(IntermediateState &IState, SCCId CurrSCC, + FunctionId CurrFun) { + const auto *Fun = (*Functions)[CurrFun]; + const auto &Sum = Summaries[CurrFun]; + + Mapper M(Sum.SourceFactIds.size()); + + for (const auto &CS : ICF->getCallersOf(Fun)) { + auto CallerId = Functions->getOrNull(CS->getFunction()); + if (!CallerId) { + continue; + } + + auto CallerSCC = SCCs->SCCOfNode[*CallerId]; + if (CallerSCC != CurrSCC) { + continue; + } + + M.reset(); + + const auto &In = getOrDefault(IState.PathEdges, CS); + + for (const auto &[CalleeLeak, LeakSrc] : Sum.LeakIf) { + const auto &CSSrc = M.getAllSrcFactsFor(*this, In, Sum, LeakSrc, CS); + reportOrPropagateLeak(IState, *CallerId, CalleeLeak.first, + CalleeLeak.second, CSSrc); + } + } + } + + void analyzeBlock(IntermediateState &IState, auto &Driver, + ByConstRef BlockStart) { + + auto &LocalStateRef = IState.PathEdges[BlockStart]; + if (Config.EnableEnvVersioning && + LocalStateRef.AnalyzedVersion >= LocalStateRef.Version) { + // Nothing to be done here + return; + } + + LocalStateRef.AnalyzedVersion = LocalStateRef.Version; + analyzeBlockImpl(IState, Driver, BlockStart, LocalStateRef); + } + + void analyzeBlockImpl(IntermediateState &IState, auto &Driver, + ByConstRef BlockStart, + DataFlowEnvironment LocalState) { + + auto CurrFunId = Functions.get(BlockStart->getFunction()); + + // const bool EnableAggressiveLoopPriorization = + // Config.EnableAggressiveLoopPriorization; + + Nullable CurrInst = BlockStart; + + do { + auto Last = CurrInst; + + do { + analyzeInstruction(IState, LocalState, CurrFunId, + unwrapNullable(CurrInst)); + Last = CurrInst; + if constexpr (IsBlockAwareControlFlow) { + CurrInst = ICF->getUniqueSuccessor(unwrapNullable(CurrInst)); + } else { + const auto &Succs = ICF->getSuccsOf(unwrapNullable(CurrInst)); + if (Succs.size() == 1) { + CurrInst = Succs[0]; + } else { + CurrInst = {}; + } + } + } while (CurrInst); + + Nullable UniqueSucc{}; + + // We have at least one instruction, so we can safely unwrap here + const auto &Succs = ICF->getSuccsOf(unwrapNullable(Last)); + const auto SuccSz = Succs.size(); + const bool HasSingleSucc = SuccSz == 1; + for (const auto &Succ : Succs) { + bool HasSinglePred = [&]() { + if constexpr (IsBlockAwareControlFlow) { + return ICF->hasUniquePredecessor(Succ); + } + return false; + }(); + + auto [SuccBBStateIt, Inserted] = IState.PathEdges.try_emplace( + Succ, lazy{[&] { + if (HasSingleSucc && !(HasSinglePred && !UniqueSucc)) { + return std::move(LocalState); + } + + return LocalState; + }}); + + if (HasSinglePred) { + // Assign + + if (Inserted || SuccBBStateIt->second != LocalState) { + if (!UniqueSucc) { + UniqueSucc = Succ; + if (!Inserted) { + // Note: Cannot move LocalState here, as we still + // need it in the next iteration + SuccBBStateIt->second = LocalState; + } + + } else { + Driver.push(Succ); + if (!Inserted) { + if (HasSingleSucc) { + SuccBBStateIt->second = std::move(LocalState); + } else { + SuccBBStateIt->second = LocalState; + } + } + } + + SuccBBStateIt->second.Version++; + } + continue; + } + + // Merge + if (Inserted || tryMergeStates(SuccBBStateIt->second, LocalState)) { + SuccBBStateIt->second.Version++; + + // note: HasSingleSucc implies here that UniqueSucc==nullptr + + // TODO: Should we support EnableAggressiveLoopPriorization outside of + // LLVM? It did not show significant performance benefits, though + + // if (EnableAggressiveLoopPriorization && HasSingleSucc && + // Block->getTerminator()->hasMetadata(llvm::LLVMContext::MD_loop)) + // { + // UniqueSucc = Succ; + // } else { + Driver.push(Succ); + // } + } + } + + if (SuccSz == 0 && Config.EagerReturnPropagation && + ICF->isExitInst(Last)) { + if (IState.HasNewSummary.erase(CurrFunId)) { + rescheduleCallsAtExit(IState, Driver, CurrFunId); + } + } + + CurrInst = UniqueSucc; + } while (CurrInst); + } + + void analyzeInstruction(IntermediateState &IState, + DataFlowEnvironment &LocalState, + FunctionId CurrFunId, ByConstRef Inst) { + + if (ICF->isCallSite(Inst)) { + return analyzeCallInst(IState, LocalState, CurrFunId, Inst); + } + + handleSourceSinkConfig(IState, LocalState, CurrFunId, Inst); + + if (ICF->isExitInst(Inst)) { + return analyzeExitInst(IState, LocalState, CurrFunId, Inst); + } + + Problem->normalFlow(LocalState, Inst); + } + + void analyzeExitInst(IntermediateState &IState, + DataFlowEnvironment &LocalState, + FunctionId CurrFunId, ByConstRef Inst) { + const bool InRecursion = IState.InRecursion; + bool Changed = false; + + auto &Sum = Summaries[CurrFunId].EndSummary; + + for (auto &&[ExitFact, ExitSrc] : LocalState) { + if constexpr (requires(ProblemT &P) { + { + P.shouldBeInSummary(ExitFact, Inst) + } -> std::convertible_to; + }) { + if (!Problem->shouldBeInSummary(ExitFact, Inst)) { + continue; + } + } + + auto [It, Inserted] = Sum.try_emplace(ExitFact, std::move(ExitSrc)); + if (InRecursion) { + Changed |= Inserted || It->second.tryMergeWith(std::move(ExitSrc)); + } else if (!Inserted) { + It->second.insertAllOf(std::move(ExitSrc)); + } + } + + if (Changed /* && InRecursion*/) { + IState.HasNewSummary.insert(CurrFunId); + } + } + + void analyzeCallInst(IntermediateState &IState, + DataFlowEnvironment &LocalState, + FunctionId CurrFunId, ByConstRef Inst) { + + const auto &Callees = ICF->getCalleesOfCallAt(Inst); + + const auto CSInfo = handleCallSrcSinksAndMayRecurse( + IState, LocalState, Callees, CurrFunId, Inst); + + if (CSInfo.MayRecurse) { + IState.InRecursion = true; + IState.PathEdges[Inst] = LocalState; + } + + DataFlowEnvironment CollectedSummary; + + for (const auto &CalleeFun : Callees) { + // Collect all data-flows that need to be propagated. Don't update + // LocalState in-place + + auto CalleeId = Functions.get(CalleeFun); + applySummary(IState, std::as_const(LocalState), CollectedSummary, + CalleeFun, CalleeId, Inst, CurrFunId); + } + if (CSInfo.CanCTR) { + Problem->callToRetFlow(LocalState, Inst); + } + + mergeStates(LocalState, std::move(CollectedSummary)); + } + + // TODO: applySummary + // TODO: handleCallSrcSinksAndMayRecurse + // TODO: tryMergeStates, mergeStates + // TODO: rescheduleCallsAtExit + // TODO: reportOrPropagateLeak + + // TODO: Add srcsink-config to MonoIFDSProblem + + // -- data members + + ProblemT *Problem{}; + const i_t *ICF{}; + + MonoIfdsConfig Config{}; + + std::pmr::monotonic_buffer_resource MBufRes; + // XXX: Make this synchronized when parallelizing! + std::pmr::unsynchronized_pool_resource PoolRes{&MBufRes}; + + MaybeUniquePtr> SCCs{}; + MaybeUniquePtr Functions{}; + MaybeUniquePtr> UsedGlobals{}; + + // --- global analysis state + TypedVector Summaries{}; + llvm::SmallDenseMap> Leaks{}; +}; + +template void MonoIFDSSolver::solve() { + // Step 1: Check for pre-analysis results: If any of them is null, create them + + // Step 2: Pre-allocate buffers + Summaries.resize(Functions->size()); + + // Step 3: Analyze each CG-SCC in isolation + + for (const auto &[SCC, CurrFuns] : SCCs->NodesInSCC.enumerate()) { + computeFixpointForSCC(SCC, CurrFuns); + } +} + +} // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h new file mode 100644 index 0000000000..32ddc84d32 --- /dev/null +++ b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h @@ -0,0 +1,59 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/ControlFlowOrder.h" + +namespace psr::monoifds { +/// LooSee SootUp's +/// [UniversePriorityQueue](https://github.com/soot-oss/SootUp/blob/develop/sootup.analysis.intraprocedural/src/main/java/sootup/analysis/intraprocedural/UniverseSortedPriorityQueue.java) +template class TopoFixpointDriver { +public: + TopoFixpointDriver() noexcept = default; + + TopoFixpointDriver(ControlFlowOrder &&CFO) : CFO(std::move(CFO)) { + WorkList.resize(this->CFO.Order.size()); + } + + void push(ItemT Item) { + auto IId = CFO.Order.get(Item); + WorkList.set(uint32_t(IId)); + + if (int(IId) > Max) { + Max = int(IId); + } + } + + std::optional pop() { + if (Max < 0) { + return std::nullopt; + } + + auto IId = ControlFlowOrder::CFGOrderId(Max); + Max = WorkList.find_prev(Max); + return CFO.Order[IId]; + } + + [[nodiscard]] bool empty() const noexcept { return Max < 0; } + + LLVM_ATTRIBUTE_ALWAYS_INLINE void run(std::invocable auto Handler) { + while (auto Inst = pop()) { + std::invoke(Handler, *Inst); + } + } + + [[nodiscard]] constexpr const auto &getCFO() const noexcept { return CFO; } + +private: + ControlFlowOrder CFO; + llvm::BitVector WorkList; + int Max = -1; +}; +} // namespace psr::monoifds diff --git a/include/phasar/Utils/ArraySet.h b/include/phasar/Utils/ArraySet.h new file mode 100644 index 0000000000..80013385e9 --- /dev/null +++ b/include/phasar/Utils/ArraySet.h @@ -0,0 +1,100 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/ByRef.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +#include +#include +#include + +namespace psr { + +namespace array_set::detail { +template +[[nodiscard]] llvm::ArrayRef dropFront(llvm::ArrayRef AR, size_t Drop) { + if (Drop >= AR.size()) { + return {}; + } + + return AR.drop_front(Drop); +} +} // namespace array_set::detail + +template ::value> +class ArraySet { +public: + ArraySet() noexcept = default; + + bool insert(T Value) + requires(psr::CanEfficientlyPassByValue) + { + using array_set::detail::dropFront; + if (isContained(Vecs[InsertIndex], Value) || + isContained(dropFront(Vecs[1 - InsertIndex], IterIdx + 1), Value)) { + + return false; + } + + Vecs[InsertIndex].push_back(Value); + return true; + } + + bool insert(auto &&Value) + requires(psr::CanEfficientlyPassByValue) + { + using array_set::detail::dropFront; + if (isContained(Vecs[InsertIndex], Value) || + isContained(dropFront(Vecs[1 - InsertIndex], IterIdx + 1), Value)) { + return false; + } + + Vecs[InsertIndex].push_back(PSR_FWD(Value)); + return true; + } + + [[nodiscard]] bool empty() const noexcept { + return Vecs[0].empty() && Vecs[1].empty(); + } + + template void foreach (BodyT Body) { + assert(IterIdx == 0); + do { + auto QIndex = 1 - InsertIndex; + auto &Q = Vecs[QIndex]; + for (; IterIdx != Q.size(); ++IterIdx) { + std::invoke(Body, std::move(Q[IterIdx])); + } + Vecs[QIndex].clear(); + IterIdx = 0; + InsertIndex = QIndex; + } while (!Vecs[1 - InsertIndex].empty()); + } + + [[nodiscard]] size_t getMemorySize() const noexcept { + return Vecs[0].capacity_in_bytes() + Vecs[1].capacity_in_bytes(); + } + +private: + static bool isContained(llvm::ArrayRef Data, ByConstRef Val) noexcept { + auto It = std::find(std::execution::unseq, Data.begin(), Data.end(), Val); + return It != Data.end(); + } + + std::array, 2> Vecs; + size_t InsertIndex = 0; + size_t IterIdx = 0; +}; +} // namespace psr diff --git a/include/phasar/Utils/FunctionCompressor.h b/include/phasar/Utils/FunctionCompressor.h new file mode 100644 index 0000000000..b7b6bab230 --- /dev/null +++ b/include/phasar/Utils/FunctionCompressor.h @@ -0,0 +1,25 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/StrongTypeDef.h" + +#include "llvm/IR/Function.h" + +#include + +PHASAR_STRONG_TYPEDEF(psr, uint32_t, FunctionId); + +namespace psr { +using FunctionCompressor = Compressor; + +std::string to_string(FunctionId FId); +} // namespace psr diff --git a/include/phasar/Utils/Lazy.h b/include/phasar/Utils/Lazy.h new file mode 100644 index 0000000000..519003ef43 --- /dev/null +++ b/include/phasar/Utils/Lazy.h @@ -0,0 +1,38 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include +#include +#include + +namespace psr { +// NOLINTNEXTLINE(readability-identifier-naming) +template struct lazy { + Fn F; + + template + requires(!std::is_same_v, lazy>) + lazy(FF &&F) noexcept(std::is_nothrow_constructible_v) + : F(std::forward(F)) {} + + constexpr operator std::invoke_result_t() && noexcept( + std::is_nothrow_invocable_v) { + return std::invoke(std::move(F)); + } +}; + +template lazy(FF) -> lazy>; + +#define PSR_LAZY(...) \ + ::psr::lazy { \ + [&] { return __VA_ARGS__; } \ + } +} // namespace psr diff --git a/include/phasar/Utils/StrongTypeDef.h b/include/phasar/Utils/StrongTypeDef.h new file mode 100644 index 0000000000..072f3f6832 --- /dev/null +++ b/include/phasar/Utils/StrongTypeDef.h @@ -0,0 +1,96 @@ +#pragma once + +#include "phasar/Utils/ByRef.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/HashBuilder.h" + +#include +#include +#include + +#define PHASAR_STRONG_TYPEDEF(NAMESPACE, TYPE, NAME, ...) \ + namespace NAMESPACE { \ + enum class [[clang::enum_extensibility(open)]] NAME : TYPE { __VA_ARGS__ }; \ + } \ + namespace llvm { \ + template <> struct DenseMapInfo { \ + using NAME = NAMESPACE::NAME; \ + static constexpr NAME getEmptyKey() noexcept { \ + return NAME(std::is_signed_v ? std::numeric_limits::min() \ + : std::numeric_limits::max()); \ + } \ + static constexpr NAME getTombstoneKey() noexcept { \ + return NAME(std::is_signed_v \ + ? std::numeric_limits::min() + 1 \ + : std::numeric_limits::max() - 1); \ + } \ + static auto getHashValue(NAME Id) noexcept { \ + return llvm::hash_value(TYPE(Id)); \ + } \ + static constexpr bool isEqual(NAME Id1, NAME Id2) noexcept { \ + return Id1 == Id2; \ + } \ + }; \ + } \ + namespace std { \ + template <> struct hash { \ + auto operator()(::psr::ByConstRef Id) const noexcept { \ + return llvm::hash_value(TYPE(Id)); \ + } \ + }; \ + } + +#define PHASAR_DERIVE_ENUM_DMI(QUAL_NAME, TYPE, TYPE_PARAM) \ + namespace llvm { \ + template struct DenseMapInfo { \ + static constexpr QUAL_NAME getEmptyKey() noexcept { \ + return QUAL_NAME(std::is_signed_v \ + ? std::numeric_limits::min() \ + : std::numeric_limits::max()); \ + } \ + static constexpr QUAL_NAME getTombstoneKey() noexcept { \ + return QUAL_NAME(std::is_signed_v \ + ? std::numeric_limits::min() + 1 \ + : std::numeric_limits::max() - 1); \ + } \ + static auto getHashValue(QUAL_NAME Id) noexcept { \ + return llvm::hash_value(TYPE(Id)); \ + } \ + static constexpr bool isEqual(QUAL_NAME Id1, QUAL_NAME Id2) noexcept { \ + return Id1 == Id2; \ + } \ + }; \ + } \ + namespace std { \ + template <> struct hash { \ + auto operator()(::psr::ByConstRef Id) const noexcept { \ + return llvm::hash_value(TYPE(Id)); \ + } \ + }; \ + } + +#define PHASAR_DERIVE_DMI(QUAL_TYPE) \ + namespace llvm { \ + template <> struct DenseMapInfo<::QUAL_TYPE> { \ + using Type = ::QUAL_TYPE; \ + static Type getEmptyKey() noexcept { return Type::getEmptyKey(); } \ + static Type getTombstoneKey() noexcept { return Type::getTombstoneKey(); } \ + static bool isEqual(::psr::ByConstRef Lhs, \ + ::psr::ByConstRef Rhs) noexcept { \ + return Lhs == Rhs; \ + } \ + static auto getHashValue(::psr::ByConstRef Val) noexcept { \ + using llvm::hash_value; \ + return hash_value(Val); \ + } \ + }; \ + } \ + namespace std { \ + template <> struct hash { \ + auto operator()(::psr::ByConstRef<::QUAL_NAME> Id) const noexcept { \ + using llvm::hash_value; \ + return hash_value(TYPE(Id)); \ + } \ + }; \ + } diff --git a/include/phasar/Utils/UsedGlobalsHolder.h b/include/phasar/Utils/UsedGlobalsHolder.h new file mode 100644 index 0000000000..bcb297267c --- /dev/null +++ b/include/phasar/Utils/UsedGlobalsHolder.h @@ -0,0 +1,27 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/ADT/DenseSet.h" + +namespace psr { + +enum class FunctionId : uint32_t; + +template struct UsedGlobalsHolder { + using GlobalSet = llvm::SmallDenseSet; + + TypedVector, GlobalSet> GlobsPerSCC; + TypedVector, GlobalSet> InitialGlobsPerSCC; +}; + +} // namespace psr diff --git a/lib/ControlFlow/ControlFlowOrder.cpp b/lib/ControlFlow/ControlFlowOrder.cpp new file mode 100644 index 0000000000..72fc5b1ae9 --- /dev/null +++ b/lib/ControlFlow/ControlFlowOrder.cpp @@ -0,0 +1,182 @@ +#include "phasar/ControlFlow/ControlFlowOrder.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/ArraySet.h" +#include "phasar/Utils/FunctionCompressor.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" + +using namespace psr; + +void psr::computeCFGOrder(ControlFlowOrder &Into, const llvm::Function *Fun) { + + llvm::SmallDenseSet Seen; + + const auto Visit = [&, CFG = psr::LLVMBasedCFG()]( + auto &Visit, const llvm::Instruction *Inst) { + if (!Seen.insert(Inst).second) { + return; + } + + const auto *Next = Inst->getNextNonDebugInstruction(); + if (Next) { + Visit(Visit, Next); + } else { + for (const auto *Succ : CFG.getSuccsOf(Inst)) { + Visit(Visit, Succ); + } + } + + Into.Order.insert(Inst); + }; + + if (!Fun->isDeclaration()) { + Visit(Visit, &Fun->getEntryBlock().front()); + } +} + +constexpr static auto psrGetExitPoints(const auto *Fun) { + if constexpr (requires() { psr::getAllExitPoints(Fun, true); }) { + return psr::getAllExitPoints(Fun, /*IncludeResume=*/true); + } else { + return psr::getAllExitPoints(Fun); + } +} + +static llvm::SmallDenseMap +computeDistanceToRet(llvm::ArrayRef Funs, + const SCCHolder &SCCs, SCCId SCC, + const FunctionCompressor &Functions, + const psr::LLVMBasedCallGraph &CG) { + llvm::SmallDenseMap Ret; + + ArraySet WL; + + for (auto FunId : Funs) { + const auto *Fun = Functions[FunId]; + for (const auto *ExitInst : psrGetExitPoints(Fun)) { + WL.insert(ExitInst); + Ret[ExitInst] = 0; + } + + WL.foreach ([&, CFG = psr::LLVMBasedCFG()](const auto *Inst) { + const auto Dist = Ret[Inst]; + const auto NextDist = [&] { + size_t NextDist = Dist + 1; + if (llvm::isa(Inst)) { + for (const auto *Callee : CG.getCalleesOfCallAt(Inst)) { + const auto CalleeId = Functions.getOrNull(Callee); + if (CalleeId && SCCs.SCCOfNode[*CalleeId] == SCC) { + // some heuristics... + NextDist += 100; + } + } + } + return NextDist; + }(); + + for (const auto *Pred : CFG.getPredsOf(Inst)) { + auto [It, Inserted] = Ret.try_emplace(Pred, NextDist); + if (!Inserted && It->second > NextDist) { + It->second = NextDist; + Inserted = true; + } + + if (Inserted) { + WL.insert(Pred); + } + } + }); + } + + return Ret; +} + +void psr::computeCFGOrder(ControlFlowOrder &Into, + const SCCHolder &SCCs, + SCCId SCC, + const psr::LLVMBasedCallGraph &CG, + const FunctionCompressor &Functions) { + llvm::SmallDenseSet Seen; + + const auto &Funs = SCCs.NodesInSCC[SCC]; + + const auto DistToRet = computeDistanceToRet(Funs, SCCs, SCC, Functions, CG); + + const auto Visit = [&, CFG = psr::LLVMBasedCFG()]( + auto &Visit, const llvm::Instruction *Inst) { + if (!Seen.insert(Inst).second) { + return; + } + + if (llvm::isa(Inst)) { + for (const auto *CS : CG.getCallersOf(Inst->getFunction())) { + const auto *Caller = CS->getFunction(); + const auto CallerId = Functions.getOrNull(Caller); + if (CallerId && SCCs.SCCOfNode[*CallerId] == SCC) { + Visit(Visit, CS); + } + } + } + + const auto *Next = Inst->getNextNonDebugInstruction(); + if (Next) { + Visit(Visit, Next); + } else { + auto Succs = CFG.getSuccsOf(Inst); + llvm::sort(Succs, [&](const auto *I1, const auto *I2) { + return DistToRet.lookup(I1) < DistToRet.lookup(I2); + }); + for (const auto *Succ : Succs) { + Visit(Visit, Succ); + } + } + + Into.Order.insert(Inst); + }; + + assert(!Funs.empty()); + + const llvm::Function *LargestFun = nullptr; + size_t LargestSz = 0; + for (auto FunId : Funs) { + const auto *Fun = Functions[FunId]; + if (Fun->isDeclaration()) { + continue; + } + + const auto Callers = CG.getCallersOf(Fun); + const auto *OutsideCaller = llvm::find_if(Callers, [&](const auto *Caller) { + const auto CallerId = Functions.getOrNull(Caller->getFunction()); + return CallerId && SCCs.SCCOfNode[*CallerId] != SCC; + }); + if (OutsideCaller == Callers.end()) { + continue; + } + + const auto FunSz = Fun->size(); + if (FunSz > LargestSz) { + LargestFun = Fun; + LargestSz = FunSz; + } + } + + assert(!LargestFun || !LargestFun->isDeclaration()); + if (LargestFun) { + Visit(Visit, &LargestFun->getEntryBlock().front()); + } + + for (auto FunId : Funs) { + const auto *Fun = Functions[FunId]; + if (!Fun->isDeclaration()) { + Visit(Visit, &Fun->getEntryBlock().front()); + } + } +} diff --git a/lib/Utils/FunctionCompressor.cpp b/lib/Utils/FunctionCompressor.cpp new file mode 100644 index 0000000000..b7c7d4719d --- /dev/null +++ b/lib/Utils/FunctionCompressor.cpp @@ -0,0 +1,7 @@ +#include "phasar/Utils/FunctionCompressor.h" + +#include + +std::string psr::to_string(FunctionId FId) { + return "@" + std::to_string(size_t(FId)); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index b5de60ba49..2f60ae9126 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -8,6 +8,7 @@ set(IfdsIdeSources SparseIDESolverTest.cpp IterativeIDESolverTest.cpp CFLFieldSensTest.cpp + MonoIFDSSolverTest.cpp ) foreach(TEST_SRC ${IfdsIdeSources}) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp new file mode 100644 index 0000000000..dedd2e2e43 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp @@ -0,0 +1 @@ +#include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" From 93d7054638705bca1b32db21226d9559506f11d5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 27 Mar 2026 16:53:28 +0100 Subject: [PATCH 02/29] Port rest of monoifds-solver --- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 57 +++--- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 168 ++++++++++++++++-- 2 files changed, 190 insertions(+), 35 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index e6af198fed..9cd1ebc80f 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -21,29 +21,36 @@ template concept MonoIFDSAnalysisDomain = IsAnalysisDomain; template -concept MonoIFDSProblem = - requires(T &Problem, - DataFlowEnvironment &InOut, - typename T::ProblemAnalysisDomain::n_t Inst, - const typename T::ProblemAnalysisDomain::n_t &Fact, - const typename T::ProblemAnalysisDomain::f_t &Fun) { - typename T::ProblemAnalysisDomain; - requires MonoIFDSAnalysisDomain; - - Problem.normalFlow(InOut, Inst); - Problem.callToRetFlow(InOut, Inst); - { - Problem.returnFlow(Inst, Fact) - } -> psr::is_iterable_over_v; - - { - Problem.invReturnFlow(Inst, Fact) - } -> psr::is_iterable_over_v; - - { - Problem.getZeroValue() - } -> std::convertible_to; - - Problem.initialSeeds(InOut, Fun); - }; +concept MonoIFDSProblem = requires( + T &Problem, + DataFlowEnvironment &InOut, + typename T::ProblemAnalysisDomain::n_t Inst, + const typename T::ProblemAnalysisDomain::n_t &Fact, + const typename T::ProblemAnalysisDomain::f_t &Fun) { + typename T::ProblemAnalysisDomain; + requires MonoIFDSAnalysisDomain; + + Problem.normalFlow(InOut, Inst); + Problem.callToRetFlow(InOut, Inst); + { + Problem.returnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + { + Problem.invReturnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + { + Problem.getZeroValue() + } -> std::convertible_to; + + Problem.initialSeeds(InOut, Fun); + + Problem.generateTaintsAtCall( + Inst, Fun, [](const typename T::ProblemAnalysisDomain::d_t & GenFact) {}); + Problem.leakTaintsAtCall( + Inst, Fun, + [](const typename T::ProblemAnalysisDomain::d_t & LeakFact) {}); + Problem.onResult(Inst, Fact); +}; } // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 0e772e59ff..499d49b103 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -58,8 +58,8 @@ template class MonoIFDSSolver { return *this; } - MonoIFDSSolver - setFunctionCompressor(const FunctionCompressor *Functions) & noexcept { + MonoIFDSSolver setFunctionCompressor( + const Compressor *Functions) & noexcept { this->Functions = Functions; return *this; } @@ -524,7 +524,7 @@ template class MonoIFDSSolver { auto CalleeId = Functions.get(CalleeFun); applySummary(IState, std::as_const(LocalState), CollectedSummary, - CalleeFun, CalleeId, Inst, CurrFunId); + CalleeId, Inst, CurrFunId); } if (CSInfo.CanCTR) { Problem->callToRetFlow(LocalState, Inst); @@ -533,13 +533,161 @@ template class MonoIFDSSolver { mergeStates(LocalState, std::move(CollectedSummary)); } - // TODO: applySummary - // TODO: handleCallSrcSinksAndMayRecurse - // TODO: tryMergeStates, mergeStates - // TODO: rescheduleCallsAtExit - // TODO: reportOrPropagateLeak + void applySummary(IntermediateState &IState, + const DataFlowEnvironment &In, + DataFlowEnvironment &LocalState, FunctionId CalleeId, + n_t Inst, FunctionId CurrFunId) { + const auto &Sum = Summaries[CalleeId]; + Mapper M(Sum.SourceFactIds.size()); + + for (const auto &[SumFact, SumSrc] : Sum.EndSummary) { + auto &&RetFacts = Problem->returnFlow(Inst, SumFact); + if (RetFacts.empty()) { + continue; + } + + const auto &RetSrcFacts = + M.getAllSrcFactsFor(*this, In, Sum, SumSrc, Inst); + if (RetSrcFacts.empty()) { + continue; + } + + for (const auto *RetFact : RetFacts) { + LocalState[RetFact].insertAllOf(RetSrcFacts); + } + } + + if (CalleeId != CurrFunId) { // Prevent self-insertion + for (const auto &[CalleeLeak, LeakSrc] : Sum.LeakIf) { + const auto &CSSrc = M.getAllSrcFactsFor(*this, In, Sum, LeakSrc, Inst); + reportOrPropagateLeak(IState, CurrFunId, CalleeLeak.first, + CalleeLeak.second, CSSrc); + } + } + } + + struct CallSiteInfo { + bool MayRecurse = false; + bool CanCTR = false; + }; + + [[nodiscard]] CallSiteInfo handleCallSrcSinksAndMayRecurse( + IntermediateState &IState, DataFlowEnvironment &LocalState, + const auto &Callees, FunctionId CurrFunId, ByConstRef Inst) { + + const auto &SCCs = *this->SCCs; + const auto CurrSCC = IState.CurrSCC; + + bool MayRecurse = false; + bool CanCTR = !Callees.empty(); + for (f_t CalleeFun : Callees) { + if (ICF->getStartPointsOf(CalleeFun).empty()) { + CanCTR = false; + } + + auto CalleeId = Functions.get(CalleeFun); + auto CalleeSCC = SCCs.SCCOfNode[CalleeId]; + if (CalleeSCC == CurrSCC) { + MayRecurse = true; + IState.Incoming[CalleeFun].insert(Inst); + } + + Problem->leakTaintsAtCall(Inst, CalleeFun, [&](ByConstRef LeakFact) { + if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { + reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); + } + }); + + // Generate taints from zero: + Problem->generateTaintsAtCall( + Inst, CalleeFun, [&](ByConstRef GenFact) { + // Note: Assume, this gets called for all relevant aliases as well + LocalState[GenFact].insert(SourceFactId(0)); + }); + } + + return { + .MayRecurse = MayRecurse, + .CanCTR = CanCTR, + }; + } - // TODO: Add srcsink-config to MonoIFDSProblem + void rescheduleCallsAtExit(IntermediateState &IState, auto &Driver, + FunctionId CurrFunId) { + const auto &Fun = (*Functions)[CurrFunId]; + const auto EnableEnvVersioning = Config.EnableEnvVersioning; + + for (const auto &CS : getOrDefault(IState.Incoming, Fun)) { + if (auto CallerId = Functions->getOrNull(CS->getFunction())) { + // Driver.push(CS); + if (EnableEnvVersioning) { + IState.PathEdges[CS].Version++; + } + + Driver.push(CS); + } + } + } + + void reportOrPropagateLeak(IntermediateState &IState, FunctionId CurrFunId, + n_t LeakInst, d_t LeakFact, SourceFactSet From) { + // The zero fact has always Id 0! + if (From.tryErase(SourceFactId(0))) { + if (Leaks[LeakInst].insert(LeakFact).second) { + Problem->onResult(LeakInst, LeakFact); + } + } + + auto &CurrSum = Summaries[CurrFunId]; + + bool New = + CurrSum.LeakIf[{LeakInst, LeakFact}].tryMergeWith(std::move(From)); + + if (New && IState.InRecursion) { + IState.HasNewLeaks.insert(CurrFunId); + } + } + + static void mergeStates(DataFlowEnvironment &Into, + DataFlowEnvironment &&From) { + if (Into.empty()) { + if (&Into != &From) { + Into = std::move(From); + } + + return; + } + + if (Into.size() < From.size()) { + std::swap(Into, From); + } + + for (auto &[TgtFact, SrcFactIds] : From) { + auto [It, Inserted] = Into.try_emplace(TgtFact, std::move(SrcFactIds)); + if (!Inserted) { + It->second.insertAllOf(std::move(SrcFactIds)); + } + } + } + + [[nodiscard]] static bool + tryMergeStates(DataFlowEnvironment &Into, + const DataFlowEnvironment &From) { + // TODO Handle phis + + if (Into.empty()) { + Into = From; + return !From.empty(); + } + + bool Changed = false; + for (const auto &[TgtFact, SrcFactIds] : From) { + auto [It, Inserted] = Into.try_emplace(TgtFact, SrcFactIds); + Changed |= Inserted || It->second.tryMergeWith(SrcFactIds); + } + + return Changed; + } // -- data members @@ -553,7 +701,7 @@ template class MonoIFDSSolver { std::pmr::unsynchronized_pool_resource PoolRes{&MBufRes}; MaybeUniquePtr> SCCs{}; - MaybeUniquePtr Functions{}; + MaybeUniquePtr> Functions{}; MaybeUniquePtr> UsedGlobals{}; // --- global analysis state From 940b2196c1ca195327a3ab7f15971329e2739802 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 30 Mar 2026 15:48:35 +0200 Subject: [PATCH 03/29] Add rest of monoifds to phasar --- .clang-tidy | 1 + include/phasar/ControlFlow/CFG.h | 4 + include/phasar/ControlFlow/ControlFlowOrder.h | 57 +++- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 29 +- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 107 ++++--- .../phasar/DataFlow/MonoIfds/RPOWorkList.h | 13 +- .../PhasarLLVM/DataFlow/MonoIfds/AliasCache.h | 61 ++++ .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 105 +++++++ .../PhasarLLVM/TaintConfig/TaintConfigBase.h | 13 + .../TaintConfig/TaintConfigUtilities.h | 44 +++ include/phasar/Pointer/AliasIterator.h | 9 +- include/phasar/Utils/TypeTraits.h | 7 +- lib/CMakeLists.txt | 5 + lib/ControlFlow/ControlFlowOrder.cpp | 182 ----------- lib/PhasarLLVM/DataFlow/CMakeLists.txt | 1 + lib/PhasarLLVM/DataFlow/DataFlow.cppm | 1 + .../DataFlow/MonoIfds/AliasCache.cpp | 35 +++ .../DataFlow/MonoIfds/CMakeLists.txt | 22 ++ .../Problems/MonoIFDSTaintAnalysis.cpp | 293 ++++++++++++++++++ .../DataFlow/MonoIfds/Problems/MonoIfds.cppm | 8 + .../DataFlow/IfdsIde/MonoIFDSSolverTest.cpp | 9 + 21 files changed, 741 insertions(+), 265 deletions(-) create mode 100644 include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h create mode 100644 include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h delete mode 100644 lib/ControlFlow/ControlFlowOrder.cpp create mode 100644 lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp create mode 100644 lib/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt create mode 100644 lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp create mode 100644 lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIfds.cppm diff --git a/.clang-tidy b/.clang-tidy index 3308a915db..d0cbb771d6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -19,6 +19,7 @@ Checks: '-*, -readability-identifier-length, -readability-redundant-member-init, -readability-use-anyofallof, + -readability-avoid-return-with-void-value, cppcoreguidelines-*, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, diff --git a/include/phasar/ControlFlow/CFG.h b/include/phasar/ControlFlow/CFG.h index da8f7e77bc..9fbecba675 100644 --- a/include/phasar/ControlFlow/CFG.h +++ b/include/phasar/ControlFlow/CFG.h @@ -60,6 +60,10 @@ concept CFG = requires(const T &CF, typename T::n_t Inst, typename T::f_t Fun) { requires InstructionClassifier; }; +template +concept CFGOf = CFG && std::same_as && + std::same_as; + template concept BidiCFG = CFG && requires(const T &CF, typename T::n_t Inst, typename T::f_t Fun) { diff --git a/include/phasar/ControlFlow/ControlFlowOrder.h b/include/phasar/ControlFlow/ControlFlowOrder.h index 74a8c19aab..560ccbf6d9 100644 --- a/include/phasar/ControlFlow/ControlFlowOrder.h +++ b/include/phasar/ControlFlow/ControlFlowOrder.h @@ -9,21 +9,22 @@ * Fabian Schiebel and others *****************************************************************************/ -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/ControlFlow/CFG.h" +#include "phasar/Utils/ByRef.h" #include "phasar/Utils/Compressor.h" -#include "phasar/Utils/FunctionCompressor.h" -#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/Nullable.h" +#include "phasar/Utils/Utilities.h" -#include "llvm/IR/Instruction.h" +#include "llvm/ADT/DenseSet.h" #include namespace psr { -struct ControlFlowOrder { +template struct ControlFlowOrder { enum class CFGOrderId : uint32_t {}; - Compressor Order; + Compressor Order; [[nodiscard]] auto begin() const noexcept { return std::make_reverse_iterator(Order.begin()); @@ -33,17 +34,39 @@ struct ControlFlowOrder { } }; -// TODO: Make ehtis independent from LLVM IR -void computeCFGOrder(ControlFlowOrder &Into, const llvm::Function *Fun); -void computeCFGOrder( - ControlFlowOrder &Into, const SCCHolder &SCCs, - SCCId SCC, const psr::LLVMBasedCallGraph &CG, - const Compressor &Functions); - -[[nodiscard]] inline ControlFlowOrder -computeCFGOrder(const llvm::Function *Fun) { - ControlFlowOrder Ret; - computeCFGOrder(Ret, Fun); +template CFGTy> +void computeCFGOrder(ControlFlowOrder &Into, const CFGTy &CF, const F &Fun) { + llvm::SmallDenseSet Seen; + + const auto Visit = [&](auto &Visit, ByConstRef Inst) { + if (!Seen.insert(Inst).second) { + return; + } + + scope_exit Push = [&]() { Into.Order.insert(Inst); }; + + if constexpr (IsBlockAwareControlFlow) { + if (auto Next = CF.getUniqueSuccessor(Inst)) { + Visit(Visit, unwrapNullable(Next)); + return; + } + } + for (const auto &Succ : CF.getSuccsOf(Inst)) { + Visit(Visit, Succ); + } + }; + + for (const auto &SP : CF.getStartPointsOf(Fun)) { + Visit(Visit, SP); + } +} + +template CFGTy> +[[nodiscard]] inline ControlFlowOrder computeCFGOrder(const CFGTy &CF, + const F &Fun) { + ControlFlowOrder Ret; + computeCFGOrder(Ret, CF, Fun); return Ret; } + } // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 9cd1ebc80f..9e2aa51af8 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -11,9 +11,13 @@ #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" #include "phasar/Domain/AnalysisDomain.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypeTraits.h" #include +#include namespace psr::monoifds { @@ -21,15 +25,14 @@ template concept MonoIFDSAnalysisDomain = IsAnalysisDomain; template -concept MonoIFDSProblem = requires( +concept LocalMonoIFDSProblem = requires( T &Problem, DataFlowEnvironment &InOut, typename T::ProblemAnalysisDomain::n_t Inst, const typename T::ProblemAnalysisDomain::n_t &Fact, - const typename T::ProblemAnalysisDomain::f_t &Fun) { - typename T::ProblemAnalysisDomain; - requires MonoIFDSAnalysisDomain; - + const typename T::ProblemAnalysisDomain::f_t &Fun, + Compressor + &SeedCompressor) { Problem.normalFlow(InOut, Inst); Problem.callToRetFlow(InOut, Inst); { @@ -44,13 +47,27 @@ concept MonoIFDSProblem = requires( Problem.getZeroValue() } -> std::convertible_to; - Problem.initialSeeds(InOut, Fun); + Problem.initialSeeds(InOut, SeedCompressor, Fun); Problem.generateTaintsAtCall( Inst, Fun, [](const typename T::ProblemAnalysisDomain::d_t & GenFact) {}); + + Problem.generateTaints( + Inst, [](const typename T::ProblemAnalysisDomain::d_t & GenFact) {}); Problem.leakTaintsAtCall( Inst, Fun, [](const typename T::ProblemAnalysisDomain::d_t & LeakFact) {}); + Problem.leakTaints( + Inst, [](const typename T::ProblemAnalysisDomain::d_t & LeakFact) {}); Problem.onResult(Inst, Fact); }; + +template +concept MonoIFDSProblem = requires(T &Problem, SCCId CurrSCC, + std::pmr::memory_resource *MRes) { + typename T::ProblemAnalysisDomain; + requires MonoIFDSAnalysisDomain; + + { Problem.localAnalysis(CurrSCC, MRes) } -> LocalMonoIFDSProblem; +}; } // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 499d49b103..548352797f 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -24,15 +24,14 @@ #include "phasar/Utils/Nullable.h" #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypedVector.h" -#include "phasar/Utils/UsedGlobalsHolder.h" + +#include "llvm/Support/Compiler.h" #include #include #include #include -#include - namespace psr::monoifds { template class MonoIFDSSolver { @@ -64,12 +63,6 @@ template class MonoIFDSSolver { return *this; } - MonoIFDSSolver & - setUsedGlobals(const UsedGlobalsHolder *UsedGlobals) & noexcept { - this->UsedGlobals = UsedGlobals; - return *this; - } - void solve(); private: @@ -88,24 +81,20 @@ template class MonoIFDSSolver { }; struct IntermediateState { + typename ProblemT::LocalAnalysis LocalProblem; node_hash_map> PathEdges; node_hash_map> Incoming; llvm::SmallDenseSet HasNewLeaks; llvm::SmallDenseSet HasNewSummary; - std::reference_wrapper< - const llvm::SmallDenseSet> - PermittedGlobals; SCCId CurrSCC; bool InRecursion; - IntermediateState(std::pmr::memory_resource *MRes, - const UsedGlobalsHolder &UsedGlobals, + IntermediateState(ProblemT *Problem, std::pmr::memory_resource *MRes, SCCId CurrSCC, bool InRecursion) - : PathEdges(MRes), Incoming(MRes), - PermittedGlobals(std::cref(UsedGlobals.GlobsPerSCC[CurrSCC])), - CurrSCC(CurrSCC), InRecursion(InRecursion) {} + : LocalProblem(Problem->localAnalysis(CurrSCC, MRes)), PathEdges(MRes), + Incoming(MRes), CurrSCC(CurrSCC), InRecursion(InRecursion) {} }; struct Mapper { @@ -124,7 +113,7 @@ template class MonoIFDSSolver { ComputedMappings.clear(); } - const SourceFactSet &getSourceFactsFor(auto &Solver, + const SourceFactSet &getSourceFactsFor(auto &LocalProblem, const DataFlowEnvironment &In, const FunctionSummary &CalleeSum, SourceFactId CalleeSrc, @@ -132,7 +121,7 @@ template class MonoIFDSSolver { auto &Ret = Mapping[CalleeSrc]; if (ComputedMappings.tryInsert(CalleeSrc)) { - auto &&CSFacts = Solver.Problem->invReturnFlow( + auto &&CSFacts = LocalProblem.invReturnFlow( CallInst, CalleeSum.SourceFactIds[CalleeSrc]); for (const auto *Fact : CSFacts) { @@ -145,24 +134,25 @@ template class MonoIFDSSolver { return Ret; } - void insertAllSrcFactsFor(SourceFactSet &Into, auto &Solver, + void insertAllSrcFactsFor(SourceFactSet &Into, auto &LocalProblem, const DataFlowEnvironment &In, const FunctionSummary &CalleeSum, const SourceFactSet &CalleeSrcs, ByConstRef CallInst) { CalleeSrcs.foreach ([&](auto SrcFactId) { - Into.insertAllOf( - getSourceFactsFor(Solver, In, CalleeSum, SrcFactId, CallInst)); + Into.insertAllOf(getSourceFactsFor(LocalProblem, In, CalleeSum, + SrcFactId, CallInst)); }); } [[nodiscard]] SourceFactSet - getAllSrcFactsFor(auto &Solver, const DataFlowEnvironment &In, + getAllSrcFactsFor(auto &LocalProblem, const DataFlowEnvironment &In, const FunctionSummary &CalleeSum, const SourceFactSet &CalleeSrcs, ByConstRef CallInst) { SourceFactSet Ret; - insertAllSrcFactsFor(Ret, Solver, In, CalleeSum, CalleeSrcs, CallInst); + insertAllSrcFactsFor(Ret, LocalProblem, In, CalleeSum, CalleeSrcs, + CallInst); return Ret; } }; @@ -171,7 +161,7 @@ template class MonoIFDSSolver { llvm::ArrayRef CurrFuns) { const size_t SCCSize = CurrFuns.size(); const bool InRecursion = SCCSize > 1; - IntermediateState IState(&PoolRes, *UsedGlobals, CurrSCC, InRecursion); + IntermediateState IState(&PoolRes, CurrSCC, InRecursion); const auto IterStrategy = Config.IterStrategy; const bool UseTopoFixpointDriver = [=] { @@ -187,10 +177,11 @@ template class MonoIFDSSolver { return true; }(); - ControlFlowOrder CFO; + ControlFlowOrder CFO; if (UseTopoFixpointDriver) { - // TODO: implement computeCFGOrder() - computeCFGOrder(CFO, SCCs, CurrSCC, *ICF, Functions); + for (const auto &Fun : CurrFuns) { + computeCFGOrder(CFO, *ICF, Fun); + } } ArraySetDriver DefaultDriver; @@ -201,7 +192,7 @@ template class MonoIFDSSolver { for (auto FunId : llvm::reverse(CurrFuns)) { const auto *Fun = (*Functions)[FunId]; submitInitialSeeds(IState, Driver, Summaries[FunId].SourceFactIds, - Fun); + Fun, CurrSCC); } Driver.run([&](n_t BlockStart) { analyzeBlock(IState, Driver, BlockStart); @@ -222,9 +213,9 @@ template class MonoIFDSSolver { llvm::errs() << '.'; } - ITST_ASSERT(IState.HasNewSummary.empty(), - "After repropagating, we should not have any summary " - "applications pending"); + assert(IState.HasNewSummary.empty() && + "After repropagating, we should not have any summary " + "applications pending"); }; if (UseTopoFixpointDriver) { ComputeFixpointWithDriver(TopoDriver); @@ -245,10 +236,10 @@ template class MonoIFDSSolver { void submitInitialSeeds(IntermediateState &IState, auto &Driver, Compressor &SeedCompressor, - ByConstRef Fun) { + ByConstRef Fun, SCCId CurrSCC) { const auto &SPs = ICF->getStartPointsOf(Fun); - const auto &Zero = Problem->getZeroValue(); + const auto &Zero = IState.LocalProblem.getZeroValue(); SeedCompressor.insert(Zero); assert(SeedCompressor.get(Zero) == SourceFactId(0) && "The Zero value must always have Id 0!"); @@ -257,7 +248,7 @@ template class MonoIFDSSolver { auto &SeedState = IState.PathEdges[SP]; SeedState[Zero].insert(SourceFactId(0)); - Problem->initialSeeds(SeedState, Fun); + IState.LocalProblem.initialSeeds(SeedState, SeedCompressor, Fun, CurrSCC); Driver.push(SP); } } @@ -321,7 +312,8 @@ template class MonoIFDSSolver { const auto &In = getOrDefault(IState.PathEdges, CS); for (const auto &[CalleeLeak, LeakSrc] : Sum.LeakIf) { - const auto &CSSrc = M.getAllSrcFactsFor(*this, In, Sum, LeakSrc, CS); + const auto &CSSrc = + M.getAllSrcFactsFor(IState.LocalProblem, In, Sum, LeakSrc, CS); reportOrPropagateLeak(IState, *CallerId, CalleeLeak.first, CalleeLeak.second, CSSrc); } @@ -467,7 +459,7 @@ template class MonoIFDSSolver { return analyzeExitInst(IState, LocalState, CurrFunId, Inst); } - Problem->normalFlow(LocalState, Inst); + IState.LocalProblem.normalFlow(LocalState, Inst); } void analyzeExitInst(IntermediateState &IState, @@ -527,7 +519,7 @@ template class MonoIFDSSolver { CalleeId, Inst, CurrFunId); } if (CSInfo.CanCTR) { - Problem->callToRetFlow(LocalState, Inst); + IState.LocalProblem.callToRetFlow(LocalState, Inst); } mergeStates(LocalState, std::move(CollectedSummary)); @@ -541,13 +533,13 @@ template class MonoIFDSSolver { Mapper M(Sum.SourceFactIds.size()); for (const auto &[SumFact, SumSrc] : Sum.EndSummary) { - auto &&RetFacts = Problem->returnFlow(Inst, SumFact); + auto &&RetFacts = IState.LocalProblem.returnFlow(Inst, SumFact); if (RetFacts.empty()) { continue; } const auto &RetSrcFacts = - M.getAllSrcFactsFor(*this, In, Sum, SumSrc, Inst); + M.getAllSrcFactsFor(IState.LocalProblem, In, Sum, SumSrc, Inst); if (RetSrcFacts.empty()) { continue; } @@ -559,7 +551,8 @@ template class MonoIFDSSolver { if (CalleeId != CurrFunId) { // Prevent self-insertion for (const auto &[CalleeLeak, LeakSrc] : Sum.LeakIf) { - const auto &CSSrc = M.getAllSrcFactsFor(*this, In, Sum, LeakSrc, Inst); + const auto &CSSrc = + M.getAllSrcFactsFor(IState.LocalProblem, In, Sum, LeakSrc, Inst); reportOrPropagateLeak(IState, CurrFunId, CalleeLeak.first, CalleeLeak.second, CSSrc); } @@ -592,14 +585,16 @@ template class MonoIFDSSolver { IState.Incoming[CalleeFun].insert(Inst); } - Problem->leakTaintsAtCall(Inst, CalleeFun, [&](ByConstRef LeakFact) { - if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { - reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); - } - }); + IState.LocalProblem.leakTaintsAtCall( + Inst, CalleeFun, [&](ByConstRef LeakFact) { + if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { + reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, + *LeakSrc); + } + }); // Generate taints from zero: - Problem->generateTaintsAtCall( + IState.LocalProblem.generateTaintsAtCall( Inst, CalleeFun, [&](ByConstRef GenFact) { // Note: Assume, this gets called for all relevant aliases as well LocalState[GenFact].insert(SourceFactId(0)); @@ -612,6 +607,21 @@ template class MonoIFDSSolver { }; } + void handleSourceSinkConfig(IntermediateState &IState, + DataFlowEnvironment &LocalState, + FunctionId CurrFunId, n_t Inst) { + IState.LocalProblem.leaksTaint(Inst, [&](const auto &LeakFact) { + if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { + reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); + } + }); + + // Generate taints from zero: + IState.LocalProblem.generateTaints(Inst, [&](const auto &GenFact) { + LocalState[GenFact].insert(SourceFactId(0)); + }); + } + void rescheduleCallsAtExit(IntermediateState &IState, auto &Driver, FunctionId CurrFunId) { const auto &Fun = (*Functions)[CurrFunId]; @@ -634,7 +644,7 @@ template class MonoIFDSSolver { // The zero fact has always Id 0! if (From.tryErase(SourceFactId(0))) { if (Leaks[LeakInst].insert(LeakFact).second) { - Problem->onResult(LeakInst, LeakFact); + IState.LocalProblem.onResult(LeakInst, LeakFact); } } @@ -702,7 +712,6 @@ template class MonoIFDSSolver { MaybeUniquePtr> SCCs{}; MaybeUniquePtr> Functions{}; - MaybeUniquePtr> UsedGlobals{}; // --- global analysis state TypedVector Summaries{}; diff --git a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h index 32ddc84d32..7fd06988d1 100644 --- a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h @@ -10,19 +10,22 @@ *****************************************************************************/ #include "phasar/ControlFlow/ControlFlowOrder.h" +#include "phasar/Utils/ByRef.h" + +#include "llvm/ADT/BitVector.h" namespace psr::monoifds { -/// LooSee SootUp's +/// See SootUp's /// [UniversePriorityQueue](https://github.com/soot-oss/SootUp/blob/develop/sootup.analysis.intraprocedural/src/main/java/sootup/analysis/intraprocedural/UniverseSortedPriorityQueue.java) template class TopoFixpointDriver { public: TopoFixpointDriver() noexcept = default; - TopoFixpointDriver(ControlFlowOrder &&CFO) : CFO(std::move(CFO)) { + TopoFixpointDriver(ControlFlowOrder &&CFO) : CFO(std::move(CFO)) { WorkList.resize(this->CFO.Order.size()); } - void push(ItemT Item) { + void push(ByConstRef Item) { auto IId = CFO.Order.get(Item); WorkList.set(uint32_t(IId)); @@ -36,7 +39,7 @@ template class TopoFixpointDriver { return std::nullopt; } - auto IId = ControlFlowOrder::CFGOrderId(Max); + auto IId = ControlFlowOrder::CFGOrderId(Max); Max = WorkList.find_prev(Max); return CFO.Order[IId]; } @@ -52,7 +55,7 @@ template class TopoFixpointDriver { [[nodiscard]] constexpr const auto &getCFO() const noexcept { return CFO; } private: - ControlFlowOrder CFO; + ControlFlowOrder CFO; llvm::BitVector WorkList; int Max = -1; }; diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h new file mode 100644 index 0000000000..2f43603a1d --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h @@ -0,0 +1,61 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/HashUtils.h" +#include "phasar/Utils/UsedGlobalsHolder.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/SmallVector.h" + +#include + +namespace llvm { +class Function; +class Value; +class GlobalVariable; +class Instruction; +} // namespace llvm + +namespace psr::monoifds { + +class AliasCache { +public: + // Passed AI should already be FilteredAliasSet or similar + explicit AliasCache( + LLVMAliasIteratorRef AI [[clang::lifetime_capture_by(this)]], + llvm::function_ref SkipSeedsCallBack + [[clang::lifetime_capture_by(this)]], + const UsedGlobalsHolder::GlobalSet + *PermittedGlobals [[clang::lifetime_capture_by(this)]], + std::pmr::memory_resource *MRes [[clang::lifetime_capture_by(this)]]) + : AI(AI), SkipSeedsCallBack(SkipSeedsCallBack), + PermittedGlobals(&assertNotNull(PermittedGlobals)), + Cache(&assertNotNull(MRes)) {} + + [[nodiscard]] llvm::ArrayRef + getAliasSet(const llvm::Value *Fact, const llvm::Instruction *At); + +private: + // NOTE: Used the node_hash_map from + // [parallel-hash-map](https://github.com/greg7mdp/parallel-hashmap) here + // for the paper-eval! + using node_hash_map = std::pmr::unordered_map< + std::pair, + llvm::SmallVector, PairHash>; + + LLVMAliasIteratorRef AI; + llvm::function_ref SkipSeedsCallBack; + const UsedGlobalsHolder::GlobalSet + *PermittedGlobals{}; + node_hash_map Cache; +}; +} // namespace psr::monoifds diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h new file mode 100644 index 0000000000..da306094e2 --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -0,0 +1,105 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" +#include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" +#include "phasar/PhasarLLVM/Utils/LLVMAnalysisPrinter.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/NullAnalysisPrinter.h" +#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/UsedGlobalsHolder.h" +#include "phasar/Utils/Utilities.h" + +#include "llvm/ADT/STLFunctionalExtras.h" + +#include + +namespace psr::monoifds { +class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { +public: + using ProblemAnalysisDomain = LLVMIFDSAnalysisDomainDefault; + + TaintAnalysis( + const LLVMTaintConfig *Config, + const UsedGlobalsHolder *UsedGlobals, + LLVMAliasIteratorRef AI) + : Config(&assertNotNull(Config)), + UsedGlobals(&assertNotNull(UsedGlobals)), AI(AI) {} + + void setAnalysisPrinter( + MaybeUniquePtr> P) { + if (P) { + Printer = std::move(P); + } else { + Printer = NullAnalysisPrinter::getInstance(); + } + } + + struct LocalAnalysis { + TaintAnalysis *TA{}; + AliasCache AC; + SCCId CurrSCC; + + void normalFlow(DataFlowEnvironment &InOut, n_t Curr); + void callToRetFlow(DataFlowEnvironment &InOut, n_t Curr); + [[nodiscard]] llvm::SmallVector returnFlow(n_t CallSite, d_t Fact); + [[nodiscard]] llvm::SmallVector invReturnFlow(n_t CallSite, d_t Fact); + + [[nodiscard]] d_t getZeroValue() const { + return LLVMZeroValue::getInstance(); + } + + void initialSeeds(DataFlowEnvironment &SeedState, + Compressor &SeedCompressor, f_t Fun); + + void generateTaintsAtCall(n_t CS, f_t Callee, + llvm::function_ref GenFact); + void generateTaints(n_t CS, llvm::function_ref GenFact) { + // XXX: Implement (was not necessary for paper eval) + } + void leakTaintsAtCall(n_t CS, f_t Callee, + llvm::function_ref LeakFact); + void leakTaints(n_t CS, llvm::function_ref LeakFact) { + // XXX: Implement (was not necessary for paper eval) + } + void onResult(n_t Inst, d_t Fact) { + TA->Printer->onResult(Inst, Fact, + DataFlowAnalysisType::IFDSTaintAnalysis); + } + }; + + [[nodiscard]] LocalAnalysis localAnalysis(SCCId CurrSCC, + std::pmr::memory_resource *MRes) { + return LocalAnalysis{ + .TA = this, + .AC = AliasCache(AI, Config->getRegisteredSkipSeedsCallBack(), + &UsedGlobals->GlobsPerSCC[CurrSCC], MRes), + .CurrSCC = CurrSCC, + }; + } + + // TODO: shouldBeInSummary() + +private: + MaybeUniquePtr> Printer = + std::make_unique>(); + const LLVMTaintConfig *Config{}; + const UsedGlobalsHolder *UsedGlobals{}; + LLVMAliasIteratorRef AI; +}; +} // namespace psr::monoifds diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h index d3473739af..2a4814f6dc 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h @@ -40,6 +40,7 @@ template class TaintConfigBase { using TaintDescriptionCallBackTy = llvm::unique_function(n_t) const>; + using SkipSeedsCallBackTy = llvm::unique_function; enum class [[clang::flag_enum]] SeedConfig { Arguments = 1, @@ -57,6 +58,9 @@ template class TaintConfigBase { void registerSanitizerCallBack(TaintDescriptionCallBackTy CB) noexcept { SanitizerCallBack = std::move(CB); } + void registerSkipSeedsCallBack(SkipSeedsCallBackTy CB) noexcept { + SkipSeedsCallBack = std::move(CB); + } [[nodiscard]] const TaintDescriptionCallBackTy & getRegisteredSourceCallBack() const noexcept { @@ -70,6 +74,10 @@ template class TaintConfigBase { getRegisteredSanitizerCallBack() const noexcept { return SanitizerCallBack; } + [[nodiscard]] const SkipSeedsCallBackTy & + getRegisteredSkipSeedsCallBack() const noexcept { + return SkipSeedsCallBack; + } [[nodiscard]] bool isSource(v_t Val) const { return self().isSourceImpl(std::move(Val)); @@ -81,6 +89,10 @@ template class TaintConfigBase { return self().isSanitizerImpl(std::move(Val)); } + [[nodiscard]] bool skipSeed(v_t Val) const { + return SkipSeedsCallBack && SkipSeedsCallBack(Val); + } + /// \brief Calls Handler for all operands of Inst (maybe including Inst /// itself) that are generated unconditionally as tainted. /// @@ -161,6 +173,7 @@ template class TaintConfigBase { TaintDescriptionCallBackTy SourceCallBack{}; TaintDescriptionCallBackTy SinkCallBack{}; TaintDescriptionCallBackTy SanitizerCallBack{}; + SkipSeedsCallBackTy SkipSeedsCallBack{}; }; //===----------------------------------------------------------------------===// diff --git a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h index 1e0f48e6dd..797d460938 100644 --- a/include/phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h +++ b/include/phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h @@ -12,7 +12,9 @@ #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/TypeTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -22,6 +24,28 @@ #include namespace psr { + +void forallGeneratedFacts( + const LLVMTaintConfig &Config, const llvm::CallBase *CB, + const llvm::Function *Callee, + std::invocable auto WithGenFact) { + const auto &Callback = Config.getRegisteredSourceCallBack(); + if (Callback) { + auto CBFacts = Callback(CB); + llvm::for_each(CBFacts, WithGenFact); + } + + if (Config.isSource(CB)) { + std::invoke(WithGenFact, CB); + } + + for (unsigned I = 0, End = Callee->arg_size(); I < End; ++I) { + if (Config.isSource(Callee->getArg(I))) { + std::invoke(WithGenFact, CB->getArgOperand(I)); + } + } +} + template requires std::is_same_v void collectGeneratedFacts(ContainerTy &Dest, const LLVMTaintConfig &Config, @@ -44,6 +68,26 @@ void collectGeneratedFacts(ContainerTy &Dest, const LLVMTaintConfig &Config, } } +template LeakIfFn = TrueFn> +void forallLeakedFacts(const LLVMTaintConfig &Config, const llvm::CallBase *CB, + const llvm::Function *Callee, + std::invocable auto WithLeakFact, + LeakIfFn LeakIf = {}) { + + const auto &Callback = Config.getRegisteredSinkCallBack(); + if (Callback) { + auto CBLeaks = Callback(CB); + llvm::for_each(llvm::make_filter_range(CBLeaks, LeakIf), WithLeakFact); + } + + for (unsigned I = 0, End = Callee->arg_size(); I < End; ++I) { + if (Config.isSink(Callee->getArg(I)) && + std::invoke(LeakIf, CB->getArgOperand(I))) { + std::invoke(WithLeakFact, CB->getArgOperand(I)); + } + } +} + template requires std::is_same_v void collectLeakedFacts(ContainerTy &Dest, const LLVMTaintConfig &Config, diff --git a/include/phasar/Pointer/AliasIterator.h b/include/phasar/Pointer/AliasIterator.h index 1e139c88f0..00511ab6ae 100644 --- a/include/phasar/Pointer/AliasIterator.h +++ b/include/phasar/Pointer/AliasIterator.h @@ -127,8 +127,13 @@ class [[gsl::Pointer]] AliasIteratorRef : private TypeErasureUtils { template > [[nodiscard]] SetT asSet(ByConstRef Of, ByConstRef At) { SetT Set; - forallAliasesOf(Of, At, - [&Set](v_t Alias) { Set.insert(std::move(Alias)); }); + forallAliasesOf(Of, At, [&Set](v_t Alias) { + if constexpr (requires() { Set.push_back(std::move(Alias)); }) { + Set.push_back(std::move(Alias)); + } else { + Set.insert(std::move(Alias)); + } + }); return Set; } diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index e634382eb0..5d18f72bc9 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -211,10 +211,9 @@ template struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " "instead")]] has_getAsJson : std::false_type {}; // NOLINT template -struct [[deprecated( - "getAsJson should not be used anymore. Use printAsJson " - "instead")]] has_getAsJson() - .getAsJson())>> +struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " + "instead")]] +has_getAsJson().getAsJson())>> : std::true_type {}; // NOLINT struct TrueFn { diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 29d75fb21b..113cc0419f 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -14,6 +14,7 @@ if(BUILD_PHASAR_CLANG) if(PHASAR_IN_TREE) add_dependencies(phasar_interface intrinsics_gen) endif() + add_subdirectory(PhasarClang) endif() @@ -37,6 +38,7 @@ set(PHASAR_LINK_LIBS phasar_llvm_mono phasar_llvm_pathsensitivity phasar_llvm_dataflow + phasar_llvm_monoifds phasar_llvm_db phasar_llvm_domain phasar_llvm_pointer @@ -50,12 +52,15 @@ set(PHASAR_LINK_LIBS phasar_typehierarchy phasar_utils ) + if(BUILD_PHASAR_CLANG) list(APPEND PHASAR_LINK_LIBS phasar_clang) endif() + if(TARGET phasar_db) list(APPEND PHASAR_LINK_LIBS phasar_db) endif() + if(PHASAR_USE_SVF) list(APPEND PHASAR_LINK_LIBS phasar_llvm_pointer_svf) endif() diff --git a/lib/ControlFlow/ControlFlowOrder.cpp b/lib/ControlFlow/ControlFlowOrder.cpp deleted file mode 100644 index 72fc5b1ae9..0000000000 --- a/lib/ControlFlow/ControlFlowOrder.cpp +++ /dev/null @@ -1,182 +0,0 @@ -#include "phasar/ControlFlow/ControlFlowOrder.h" - -#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" -#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/ArraySet.h" -#include "phasar/Utils/FunctionCompressor.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" - -using namespace psr; - -void psr::computeCFGOrder(ControlFlowOrder &Into, const llvm::Function *Fun) { - - llvm::SmallDenseSet Seen; - - const auto Visit = [&, CFG = psr::LLVMBasedCFG()]( - auto &Visit, const llvm::Instruction *Inst) { - if (!Seen.insert(Inst).second) { - return; - } - - const auto *Next = Inst->getNextNonDebugInstruction(); - if (Next) { - Visit(Visit, Next); - } else { - for (const auto *Succ : CFG.getSuccsOf(Inst)) { - Visit(Visit, Succ); - } - } - - Into.Order.insert(Inst); - }; - - if (!Fun->isDeclaration()) { - Visit(Visit, &Fun->getEntryBlock().front()); - } -} - -constexpr static auto psrGetExitPoints(const auto *Fun) { - if constexpr (requires() { psr::getAllExitPoints(Fun, true); }) { - return psr::getAllExitPoints(Fun, /*IncludeResume=*/true); - } else { - return psr::getAllExitPoints(Fun); - } -} - -static llvm::SmallDenseMap -computeDistanceToRet(llvm::ArrayRef Funs, - const SCCHolder &SCCs, SCCId SCC, - const FunctionCompressor &Functions, - const psr::LLVMBasedCallGraph &CG) { - llvm::SmallDenseMap Ret; - - ArraySet WL; - - for (auto FunId : Funs) { - const auto *Fun = Functions[FunId]; - for (const auto *ExitInst : psrGetExitPoints(Fun)) { - WL.insert(ExitInst); - Ret[ExitInst] = 0; - } - - WL.foreach ([&, CFG = psr::LLVMBasedCFG()](const auto *Inst) { - const auto Dist = Ret[Inst]; - const auto NextDist = [&] { - size_t NextDist = Dist + 1; - if (llvm::isa(Inst)) { - for (const auto *Callee : CG.getCalleesOfCallAt(Inst)) { - const auto CalleeId = Functions.getOrNull(Callee); - if (CalleeId && SCCs.SCCOfNode[*CalleeId] == SCC) { - // some heuristics... - NextDist += 100; - } - } - } - return NextDist; - }(); - - for (const auto *Pred : CFG.getPredsOf(Inst)) { - auto [It, Inserted] = Ret.try_emplace(Pred, NextDist); - if (!Inserted && It->second > NextDist) { - It->second = NextDist; - Inserted = true; - } - - if (Inserted) { - WL.insert(Pred); - } - } - }); - } - - return Ret; -} - -void psr::computeCFGOrder(ControlFlowOrder &Into, - const SCCHolder &SCCs, - SCCId SCC, - const psr::LLVMBasedCallGraph &CG, - const FunctionCompressor &Functions) { - llvm::SmallDenseSet Seen; - - const auto &Funs = SCCs.NodesInSCC[SCC]; - - const auto DistToRet = computeDistanceToRet(Funs, SCCs, SCC, Functions, CG); - - const auto Visit = [&, CFG = psr::LLVMBasedCFG()]( - auto &Visit, const llvm::Instruction *Inst) { - if (!Seen.insert(Inst).second) { - return; - } - - if (llvm::isa(Inst)) { - for (const auto *CS : CG.getCallersOf(Inst->getFunction())) { - const auto *Caller = CS->getFunction(); - const auto CallerId = Functions.getOrNull(Caller); - if (CallerId && SCCs.SCCOfNode[*CallerId] == SCC) { - Visit(Visit, CS); - } - } - } - - const auto *Next = Inst->getNextNonDebugInstruction(); - if (Next) { - Visit(Visit, Next); - } else { - auto Succs = CFG.getSuccsOf(Inst); - llvm::sort(Succs, [&](const auto *I1, const auto *I2) { - return DistToRet.lookup(I1) < DistToRet.lookup(I2); - }); - for (const auto *Succ : Succs) { - Visit(Visit, Succ); - } - } - - Into.Order.insert(Inst); - }; - - assert(!Funs.empty()); - - const llvm::Function *LargestFun = nullptr; - size_t LargestSz = 0; - for (auto FunId : Funs) { - const auto *Fun = Functions[FunId]; - if (Fun->isDeclaration()) { - continue; - } - - const auto Callers = CG.getCallersOf(Fun); - const auto *OutsideCaller = llvm::find_if(Callers, [&](const auto *Caller) { - const auto CallerId = Functions.getOrNull(Caller->getFunction()); - return CallerId && SCCs.SCCOfNode[*CallerId] != SCC; - }); - if (OutsideCaller == Callers.end()) { - continue; - } - - const auto FunSz = Fun->size(); - if (FunSz > LargestSz) { - LargestFun = Fun; - LargestSz = FunSz; - } - } - - assert(!LargestFun || !LargestFun->isDeclaration()); - if (LargestFun) { - Visit(Visit, &LargestFun->getEntryBlock().front()); - } - - for (auto FunId : Funs) { - const auto *Fun = Functions[FunId]; - if (!Fun->isDeclaration()) { - Visit(Visit, &Fun->getEntryBlock().front()); - } - } -} diff --git a/lib/PhasarLLVM/DataFlow/CMakeLists.txt b/lib/PhasarLLVM/DataFlow/CMakeLists.txt index 06e922a93e..41f3877cf9 100644 --- a/lib/PhasarLLVM/DataFlow/CMakeLists.txt +++ b/lib/PhasarLLVM/DataFlow/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(IfdsIde) add_subdirectory(Mono) add_subdirectory(PathSensitivity) +add_subdirectory(MonoIfds) add_phasar_library(phasar_llvm_dataflow LINKS diff --git a/lib/PhasarLLVM/DataFlow/DataFlow.cppm b/lib/PhasarLLVM/DataFlow/DataFlow.cppm index 5a769f0546..cb5dacca29 100644 --- a/lib/PhasarLLVM/DataFlow/DataFlow.cppm +++ b/lib/PhasarLLVM/DataFlow/DataFlow.cppm @@ -4,4 +4,5 @@ export module phasar.llvm.dataflow; export import phasar.llvm.dataflow.ifdside; export import phasar.llvm.dataflow.mono; +export import phasar.llvm.dataflow.monoifds; export import phasar.llvm.dataflow.pathsensitivity; diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp new file mode 100644 index 0000000000..d031b9c656 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp @@ -0,0 +1,35 @@ +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" + +#include "llvm/IR/Instruction.h" + +using namespace psr; + +llvm::ArrayRef +monoifds::AliasCache::getAliasSet(const llvm::Value *Fact, + const llvm::Instruction *At) { + const auto *AtFun = At->getFunction(); + auto [It, Inserted] = Cache.try_emplace(std::make_pair(AtFun, Fact)); + static size_t Misses = 0; + static size_t Accesses = 0; + Accesses++; + if (Inserted) { + Misses++; + AI.forallAliasesOf(Fact, At, [this, &Vec = It->second](const auto *Alias) { + const auto *AliasBase = Alias->stripPointerCastsAndAliases(); + if (const auto *Glob = llvm::dyn_cast(AliasBase); + Glob && !PermittedGlobals->contains(Glob)) { + return; + } + if (!SkipSeedsCallBack || !SkipSeedsCallBack(Alias)) { + Vec.push_back(Alias); + } + }); + } + + static psr::scope_exit PrintStats = [] { + llvm::errs() << "AliasCache: Accesses: " << Accesses + << "\n> Misses: " << Misses << '\n'; + }; + + return It->second; +} diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt b/lib/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt new file mode 100644 index 0000000000..c5b5238838 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt @@ -0,0 +1,22 @@ +file(GLOB_RECURSE MONOIFDS_SRC *.h *.cpp) + +add_phasar_library(phasar_llvm_monoifds + ${MONOIFDS_SRC} + + LINKS + phasar_config + phasar_utils + phasar_llvm_pointer + phasar_llvm_typehierarchy + phasar_llvm_controlflow + phasar_llvm_utils + phasar_taintconfig + + LLVM_LINK_COMPONENTS + Core + Support + Demangle + + MODULE_FILES + MonoIfds.cppm +) diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp new file mode 100644 index 0000000000..05e30c5aa2 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -0,0 +1,293 @@ +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" + +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/MapUtils.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/IR/InstrTypes.h" + +#include +#include +#include +#include +#include + +using namespace psr; +using namespace psr::monoifds; + +using d_t = monoifds::TaintAnalysis::d_t; + +static void insertOrAssign(DataFlowEnvironment &Into, auto &&Key, + auto &&Value) { + auto [It, Inserted] = Into.try_emplace(PSR_FWD(Key), PSR_FWD(Value)); + if (!Inserted) { + It->second = PSR_FWD(Value); + } +} + +static void generateFlow(DataFlowEnvironment &InOut, const llvm::Value *To, + const llvm::Value *From) { + assert(To != From); + + // safety copy + if (auto PtrSrc = getOrDefault(InOut, From); !PtrSrc.empty()) { + // TODO: Strong update + + auto [It, Inserted] = InOut.try_emplace(To, std::move(PtrSrc)); + if (!Inserted) { + It->second.insertAllOf(PtrSrc); + } + + if (llvm::isa(From) && !From->hasNUsesOrMore(2)) { + InOut.erase(From); + } + } +} + +static void handleStore(DataFlowEnvironment &InOut, + const llvm::StoreInst *Store, AliasCache &AC) { + const auto *ValueOp = Store->getValueOperand(); + const auto *PointerOp = Store->getPointerOperand(); + + if (auto ValueSrc = getOrDefault(InOut, ValueOp); !ValueSrc.empty()) { + bool KillValue = + llvm::isa(ValueOp) && !ValueOp->hasNUsesOrMore(2); + + auto Aliases = AC.getAliasSet(Store->getPointerOperand(), Store); + + for (const auto *Alias : Aliases) { + if (Alias == PointerOp) { + continue; + } + if (Alias == ValueOp) { + KillValue = false; + continue; + } + + auto [It, Inserted] = InOut.try_emplace(Alias, ValueSrc); + if (!Inserted) { + It->second.insertAllOf(ValueSrc); + } + } + + insertOrAssign(InOut, PointerOp, std::move(ValueSrc)); + + if (KillValue) { + InOut.erase(ValueOp); + } + + } else { + InOut.erase(PointerOp); + } +} + +void monoifds::TaintAnalysis::LocalAnalysis::normalFlow( + DataFlowEnvironment &InOut, n_t Curr) { + if (const auto *Store = llvm::dyn_cast(Curr)) { + return handleStore(InOut, Store, AC); + } + + if (const auto *Load = llvm::dyn_cast(Curr)) { + return generateFlow(InOut, Load, Load->getPointerOperand()); + } + + if (const auto *GEP = llvm::dyn_cast(Curr)) { + return generateFlow(InOut, GEP, GEP->getPointerOperand()); + } + + if (const auto *Cast = llvm::dyn_cast(Curr)) { + return generateFlow(InOut, Cast, Cast->getOperand(0)); + } + + if (const auto *Extract = llvm::dyn_cast(Curr)) { + return generateFlow(InOut, Extract, Extract->getOperand(0)); + } + + for (const auto *Op : Curr->operand_values()) { + if (Op->hasOneUser()) { + InOut.erase(Op); + } + } + + // Otherwise we do not care and leave everything as it is +} + +void monoifds::TaintAnalysis::LocalAnalysis::callToRetFlow( + DataFlowEnvironment &InOut, n_t Curr) { + for (const auto &Arg : llvm::cast(Curr)->args()) { + if (Arg->getType()->isPointerTy()) { + InOut.erase(Arg); + } + } +} + +auto monoifds::TaintAnalysis::LocalAnalysis::returnFlow(n_t CallSite, d_t Fact) + -> llvm::SmallVector { + if (llvm::isa(Fact)) { + // Pass global variables as is, if desired + // Globals could also be actual arguments, then the formal argument + // needs to be generated below. Need llvm::Constant here to cover also + // ConstantExpr and ConstantAggregate + return {Fact}; + } + + const auto *Call = llvm::cast(CallSite); + if (const auto *Arg = llvm::dyn_cast(Fact)) { + auto ArgNo = Arg->getArgNo(); + + if (ArgNo >= Call->arg_size()) { + llvm::report_fatal_error("Invalid Argument: Arg " + llvm::Twine(ArgNo) + + " at call with " + + llvm::Twine(Call->arg_size()) + + " arguments: " + psr::llvmIRToString(Call) + + " --- " + psr::llvmIRToString(Arg)); + } + assert(ArgNo < Call->arg_size()); + + const auto *ActualArg = Call->getArgOperand(ArgNo); + + if (llvm::isa(ActualArg) || + llvm::isa(ActualArg)) { + return {}; + } + + return llvm::to_vector(AC.getAliasSet(ActualArg, Call)); + } + + if (Call->getFunctionType()->isVarArg()) { + if (const auto *Alloca = llvm::dyn_cast(Fact); + Alloca && isVaListAlloca(*Alloca)) { + llvm::SmallVector Ret; + + auto NumParams = Call->getFunctionType()->getNumParams(); + for (const auto &Arg : llvm::drop_begin(Call->args(), NumParams)) { + if (llvm::isa(Arg.get()) || + llvm::isa(Arg.get())) { + continue; + } + if (Arg->getType()->isPointerTy()) { + auto Aliases = AC.getAliasSet(Arg, Call); + Ret.append(Aliases.begin(), Aliases.end()); + } + } + + return Ret; + } + } + + // Everything else that has been found worthy to be mapped back must be a + // return value + return {CallSite}; +} + +auto monoifds::TaintAnalysis::LocalAnalysis::invReturnFlow(n_t CallSite, + d_t Fact) + -> llvm::SmallVector { + if (llvm::isa(Fact)) { + // Pass global variables as is, if desired + // Globals could also be actual arguments, then the formal argument + // needs to be generated below. Need llvm::Constant here to cover also + // ConstantExpr and ConstantAggregate + return {Fact}; + } + + const auto *Call = llvm::cast(CallSite); + if (const auto *Arg = llvm::dyn_cast(Fact)) { + auto ArgNo = Arg->getArgNo(); + + if (ArgNo >= Call->arg_size()) { + llvm::report_fatal_error("Invalid Argument: Arg " + llvm::Twine(ArgNo) + + " at call with " + + llvm::Twine(Call->arg_size()) + + " arguments: " + psr::llvmIRToString(Call) + + " --- " + psr::llvmIRToString(Arg)); + } + assert(ArgNo < Call->arg_size()); + + const auto *ActualArg = Call->getArgOperand(ArgNo); + + if (llvm::isa(ActualArg) || + llvm::isa(ActualArg)) { + return {}; + } + + return {ActualArg}; + } + + if (Call->getFunctionType()->isVarArg()) { + if (const auto *Alloca = llvm::dyn_cast(Fact); + Alloca && isVaListAlloca(*Alloca)) { + llvm::SmallVector Ret; + + auto NumParams = Call->getFunctionType()->getNumParams(); + for (const auto &Arg : llvm::drop_begin(Call->args(), NumParams)) { + if (llvm::isa(Arg.get()) || + llvm::isa(Arg.get())) { + continue; + } + if (Arg->getType()->isPointerTy()) { + Ret.push_back(Arg); + } + } + return Ret; + } + } + + // Everything else that has been found worthy to be mapped back must be a + // return value + return {CallSite}; +} + +void monoifds::TaintAnalysis::LocalAnalysis::initialSeeds( + DataFlowEnvironment &SeedState, + Compressor &SeedCompressor, f_t Fun) { + + for (const auto &Arg : Fun->args()) { + if (Arg.hasStructRetAttr() || + Arg.hasAttribute(llvm::Attribute::WriteOnly) || + !Arg.hasNUsesOrMore(1)) { + continue; + } + + if (TA->Config->skipSeed(&Arg)) { + continue; + } + + SeedState[&Arg].insert(SeedCompressor.getOrInsert(&Arg)); + } + + for (const auto *Glob : TA->UsedGlobals->GlobsPerSCC[CurrSCC]) { + if (TA->Config->skipSeed(Glob)) { + continue; + } + + SeedState[Glob].insert(SeedCompressor.getOrInsert(Glob)); + } + + if (Fun->isVarArg()) { + if (const auto *VA = getVaListTagOrNull(*Fun)) { + SeedState[VA].insert(SeedCompressor.getOrInsert(VA)); + } + } +} + +void monoifds::TaintAnalysis::LocalAnalysis::generateTaintsAtCall( + n_t CS, f_t Callee, llvm::function_ref GenFact) { + forallGeneratedFacts(*TA->Config, llvm::cast(CS), Callee, + [this, CS, GenFact](const auto *Fact) { + auto Aliases = AC.getAliasSet(Fact, CS); + llvm::for_each(Aliases, GenFact); + }); +} + +void monoifds::TaintAnalysis::LocalAnalysis::leakTaintsAtCall( + n_t CS, f_t Callee, llvm::function_ref LeakFact) { + forallLeakedFacts(*TA->Config, llvm::cast(CS), Callee, + LeakFact); +} diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIfds.cppm b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIfds.cppm new file mode 100644 index 0000000000..ee5ab735e0 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIfds.cppm @@ -0,0 +1,8 @@ +module; +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" + +export module phasar.llvm.dataflow.monoifds; + +export namespace psr::monoifds { +using psr::monoifds::TaintAnalysis; +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp index dedd2e2e43..56bddbbf98 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp @@ -1 +1,10 @@ #include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" + +#include "gtest/gtest.h" + +// TODO + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} From d8b74b2f406c8311466ddd6d284f9b13edcb787c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 30 Mar 2026 17:37:18 +0200 Subject: [PATCH 04/29] Integrating MonoIFDSTaintAnalysis into phasar-cli + make double_free01_c_dbg.ll pass (2nd test does not pass, though!!) --- include/phasar/ControlFlow/CGSCCs.h | 104 +++++++++++ .../DataFlow/MonoIfds/MonoIFDSProblem.h | 74 ++++---- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 59 +++--- .../phasar/DataFlow/MonoIfds/RPOWorkList.h | 2 +- .../ControlFlow/FunctionCompressor.h | 20 ++ .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 24 ++- .../PhasarLLVM/Utils/DataFlowAnalysisType.def | 1 + include/phasar/PhasarLLVM/Utils/UsedGlobals.h | 23 +++ include/phasar/Utils/BitSet.h | 4 + .../{FunctionCompressor.h => FunctionId.h} | 2 - include/phasar/Utils/HashUtils.h | 14 ++ include/phasar/Utils/SCCGeneric.h | 7 +- include/phasar/Utils/TypedVector.h | 4 + .../ControlFlow/FunctionCompressor.cpp | 36 ++++ .../Problems/MonoIFDSTaintAnalysis.cpp | 16 +- lib/PhasarLLVM/Utils/UsedGlobals.cpp | 175 ++++++++++++++++++ ...{FunctionCompressor.cpp => FunctionId.cpp} | 2 +- .../Controller/AnalysisController.cpp | 3 + .../Controller/AnalysisControllerInternal.h | 45 ++--- .../AnalysisControllerXMonoIFDSTaint.cpp | 57 ++++++ .../DataFlow/IfdsIde/CMakeLists.txt | 1 - .../DataFlow/MonoIfds/CMakeLists.txt | 1 + .../DataFlow/MonoIfds/Problems/CMakeLists.txt | 5 + .../Problems/MonoIFDSTaintAnalysisTest.cpp} | 0 24 files changed, 572 insertions(+), 107 deletions(-) create mode 100644 include/phasar/ControlFlow/CGSCCs.h create mode 100644 include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h create mode 100644 include/phasar/PhasarLLVM/Utils/UsedGlobals.h rename include/phasar/Utils/{FunctionCompressor.h => FunctionId.h} (89%) create mode 100644 lib/PhasarLLVM/ControlFlow/FunctionCompressor.cpp create mode 100644 lib/PhasarLLVM/Utils/UsedGlobals.cpp rename lib/Utils/{FunctionCompressor.cpp => FunctionId.cpp} (71%) create mode 100644 tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp create mode 100644 unittests/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt create mode 100644 unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/CMakeLists.txt rename unittests/PhasarLLVM/DataFlow/{IfdsIde/MonoIFDSSolverTest.cpp => MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp} (100%) diff --git a/include/phasar/ControlFlow/CGSCCs.h b/include/phasar/ControlFlow/CGSCCs.h new file mode 100644 index 0000000000..e588868228 --- /dev/null +++ b/include/phasar/ControlFlow/CGSCCs.h @@ -0,0 +1,104 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/CFG.h" +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionId.h" +#include "phasar/Utils/GraphTraits.h" +#include "phasar/Utils/IotaIterator.h" +#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/Utilities.h" + +#include +#include + +#include + +namespace psr { + +template +struct CompressedRevCG { + const CallGraph *CG; + const Compressor *Functions; + [[no_unique_address]] FunctionOfFn FunctionOf; +}; + +template +struct GraphTraits> { + using graph_type = CompressedRevCG; + using value_type = F; + using vertex_t = FunctionId; + using edge_t = vertex_t; + static constexpr vertex_t Invalid = + vertex_t(std::numeric_limits>::max()); + + static auto transformer(const graph_type &G) { + return [&G](ByConstRef CS) { + return G.Functions->getOrNull(G.FunctionOf(CS)).value_or(Invalid); + }; + } + + static constexpr auto ValidId = [](vertex_t Vtx) { return Vtx != Invalid; }; + + static auto outEdges(const graph_type &G, vertex_t Vtx) { + return llvm::make_filter_range( + llvm::map_range(G.CG->getCallersOf((*G.Functions)[Vtx]), + transformer(G)), + ValidId); + } + static size_t outDegree(const graph_type &G, vertex_t Vtx) { + return G.CG->getCallersOf((*G.Functions)[Vtx]).size(); + } + static auto vertices(const graph_type &G) noexcept { + return psr::iota(G.Functions->size()); + } + + static size_t size(const graph_type &G) noexcept { + return G.Functions->size(); + } + + static vertex_t target(edge_t Edge) noexcept { return Edge; } +}; + +namespace detail { +constexpr auto getCFGFunctionOf(const CFG auto &CF) { + return [&CF](const auto &Inst) { return CF.getFunctionOf(Inst); }; +} +} // namespace detail + +template +SCCHolder +computeCGSCCs(const psr::CallGraph &CG, const CFGOf auto &CF, + const Compressor &Functions) { + return computeSCCs(CompressedRevCG{ + .CG = &CG, + .Functions = &Functions, + .FunctionOf = detail::getCFGFunctionOf(CF), + }); +} + +template +SCCDependencyGraph +computeCGSCCCallers(const psr::CallGraph &CG, const CFGOf auto &CF, + const Compressor &Functions, + const SCCHolder &SCCs) { + return computeSCCDependencies( + CompressedRevCG{ + .CG = &CG, + .Functions = &Functions, + .FunctionOf = detail::getCFGFunctionOf(CF), + }, + SCCs); +} + +} // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 9e2aa51af8..3b56afd171 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -12,7 +12,7 @@ #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/Compressor.h" -#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/FunctionId.h" #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypeTraits.h" @@ -24,50 +24,46 @@ namespace psr::monoifds { template concept MonoIFDSAnalysisDomain = IsAnalysisDomain; -template -concept LocalMonoIFDSProblem = requires( - T &Problem, - DataFlowEnvironment &InOut, - typename T::ProblemAnalysisDomain::n_t Inst, - const typename T::ProblemAnalysisDomain::n_t &Fact, - const typename T::ProblemAnalysisDomain::f_t &Fun, - Compressor - &SeedCompressor) { - Problem.normalFlow(InOut, Inst); - Problem.callToRetFlow(InOut, Inst); - { - Problem.returnFlow(Inst, Fact) - } -> psr::is_iterable_over_v; +template +concept LocalMonoIFDSProblem = + requires(T &Problem, DataFlowEnvironment &InOut, + typename Dom::n_t Inst, const typename Dom::n_t &Fact, + const typename Dom::f_t &Fun, + Compressor &SeedCompressor) { + Problem.normalFlow(InOut, Inst); + Problem.callToRetFlow(InOut, Inst); + { + Problem.returnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; - { - Problem.invReturnFlow(Inst, Fact) - } -> psr::is_iterable_over_v; + { + Problem.invReturnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; - { - Problem.getZeroValue() - } -> std::convertible_to; + { Problem.getZeroValue() } -> std::convertible_to; - Problem.initialSeeds(InOut, SeedCompressor, Fun); + Problem.initialSeeds(InOut, SeedCompressor, Fun); - Problem.generateTaintsAtCall( - Inst, Fun, [](const typename T::ProblemAnalysisDomain::d_t & GenFact) {}); + Problem.generateFactsAtCall(Inst, Fun, + [](const typename Dom::d_t & GenFact) {}); - Problem.generateTaints( - Inst, [](const typename T::ProblemAnalysisDomain::d_t & GenFact) {}); - Problem.leakTaintsAtCall( - Inst, Fun, - [](const typename T::ProblemAnalysisDomain::d_t & LeakFact) {}); - Problem.leakTaints( - Inst, [](const typename T::ProblemAnalysisDomain::d_t & LeakFact) {}); - Problem.onResult(Inst, Fact); -}; + Problem.generateFacts(Inst, [](const typename Dom::d_t & GenFact) {}); + Problem.requestedEffectAtCall(Inst, Fun, + [](const typename Dom::d_t & LeakFact) {}); + Problem.requestedEffect(Inst, [](const typename Dom::d_t & LeakFact) {}); + Problem.onResult(Inst, Fact); + }; template -concept MonoIFDSProblem = requires(T &Problem, SCCId CurrSCC, - std::pmr::memory_resource *MRes) { - typename T::ProblemAnalysisDomain; - requires MonoIFDSAnalysisDomain; +concept MonoIFDSProblem = + requires(T &Problem, SCCId CurrSCC, + std::pmr::memory_resource *MRes, llvm::raw_ostream &OS) { + typename T::ProblemAnalysisDomain; + requires MonoIFDSAnalysisDomain; - { Problem.localAnalysis(CurrSCC, MRes) } -> LocalMonoIFDSProblem; -}; + { + Problem.localAnalysis(CurrSCC, MRes) + } -> LocalMonoIFDSProblem; + Problem.emitTextReport(OS); + }; } // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 548352797f..28b36be53e 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -18,10 +18,13 @@ #include "phasar/DataFlow/MonoIfds/RPOWorkList.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/Compressor.h" -#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/FunctionId.h" +#include "phasar/Utils/HashUtils.h" #include "phasar/Utils/Lazy.h" +#include "phasar/Utils/MapUtils.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/Nullable.h" +#include "phasar/Utils/RepeatIterator.h" #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypedVector.h" @@ -57,7 +60,7 @@ template class MonoIFDSSolver { return *this; } - MonoIFDSSolver setFunctionCompressor( + MonoIFDSSolver &setFunctionCompressor( const Compressor *Functions) & noexcept { this->Functions = Functions; return *this; @@ -65,19 +68,29 @@ template class MonoIFDSSolver { void solve(); + void dumpResults(llvm::raw_ostream &OS) const { + OS << "No Raw-Results Dump available yet!\n"; + } + + void emitTextReport(llvm::raw_ostream &OS) const { + Problem->emitTextReport(OS); + } + private: // NOTE: Used the node_hash_map from // [parallel-hash-map](https://github.com/greg7mdp/parallel-hashmap) here // for the paper-eval! template - using node_hash_map = std::pmr::unordered_map; + using node_hash_map = + std::pmr::unordered_map>; struct FunctionSummary { Compressor SourceFactIds; DataFlowEnvironment EndSummary; - [[clang::require_explicit_initialization]] node_hash_map< - std::pair, SourceFactSet> LeakIf; + node_hash_map, SourceFactSet> LeakIf; + + FunctionSummary(std::pmr::memory_resource *MRes) : LeakIf(MRes) {} }; struct IntermediateState { @@ -161,7 +174,7 @@ template class MonoIFDSSolver { llvm::ArrayRef CurrFuns) { const size_t SCCSize = CurrFuns.size(); const bool InRecursion = SCCSize > 1; - IntermediateState IState(&PoolRes, CurrSCC, InRecursion); + IntermediateState IState(Problem, &PoolRes, CurrSCC, InRecursion); const auto IterStrategy = Config.IterStrategy; const bool UseTopoFixpointDriver = [=] { @@ -180,7 +193,7 @@ template class MonoIFDSSolver { ControlFlowOrder CFO; if (UseTopoFixpointDriver) { for (const auto &Fun : CurrFuns) { - computeCFGOrder(CFO, *ICF, Fun); + computeCFGOrder(CFO, *ICF, (*Functions)[Fun]); } } @@ -203,13 +216,13 @@ template class MonoIFDSSolver { }; const auto RepropagateInRecursion = [&](auto &Driver) { - rescheduleCalls(IState, Driver, SCCs, CurrSCC, Functions); + rescheduleCalls(IState, Driver); while (!Driver.empty()) { Driver.run( [&](n_t BlockStart) { analyzeBlock(IState, Driver, BlockStart); }); assert(Driver.empty()); - rescheduleCalls(IState, Driver, SCCs, CurrSCC, Functions); + rescheduleCalls(IState, Driver); llvm::errs() << '.'; } @@ -231,7 +244,7 @@ template class MonoIFDSSolver { } } - repropagateLeaks(IState, SCCs, CurrSCC, Functions); + repropagateLeaks(IState, CurrSCC); } void submitInitialSeeds(IntermediateState &IState, auto &Driver, @@ -248,7 +261,7 @@ template class MonoIFDSSolver { auto &SeedState = IState.PathEdges[SP]; SeedState[Zero].insert(SourceFactId(0)); - IState.LocalProblem.initialSeeds(SeedState, SeedCompressor, Fun, CurrSCC); + IState.LocalProblem.initialSeeds(SeedState, SeedCompressor, Fun); Driver.push(SP); } } @@ -262,7 +275,7 @@ template class MonoIFDSSolver { for (auto FunId : IState.HasNewSummary) { IState.HasNewLeaks.erase(FunId); - const auto &Fun = Functions[FunId]; + const auto &Fun = (*Functions)[FunId]; for (const auto &CS : getOrDefault(IState.Incoming, Fun)) { const auto &CSFun = ICF->getFunctionOf(CS); @@ -283,7 +296,7 @@ template class MonoIFDSSolver { NewLeaksWL.swap(IState.HasNewLeaks); for (auto FunId : NewLeaksWL) { - handleLeaksForFun(IState, SCCs, CurrSCC, Functions, FunId); + handleLeaksForFun(IState, CurrSCC, FunId); } NewLeaksWL.clear(); } @@ -338,7 +351,7 @@ template class MonoIFDSSolver { ByConstRef BlockStart, DataFlowEnvironment LocalState) { - auto CurrFunId = Functions.get(BlockStart->getFunction()); + auto CurrFunId = Functions->get(BlockStart->getFunction()); // const bool EnableAggressiveLoopPriorization = // Config.EnableAggressiveLoopPriorization; @@ -514,7 +527,7 @@ template class MonoIFDSSolver { // Collect all data-flows that need to be propagated. Don't update // LocalState in-place - auto CalleeId = Functions.get(CalleeFun); + auto CalleeId = Functions->get(CalleeFun); applySummary(IState, std::as_const(LocalState), CollectedSummary, CalleeId, Inst, CurrFunId); } @@ -578,23 +591,23 @@ template class MonoIFDSSolver { CanCTR = false; } - auto CalleeId = Functions.get(CalleeFun); + auto CalleeId = Functions->get(CalleeFun); auto CalleeSCC = SCCs.SCCOfNode[CalleeId]; if (CalleeSCC == CurrSCC) { MayRecurse = true; IState.Incoming[CalleeFun].insert(Inst); } - IState.LocalProblem.leakTaintsAtCall( + IState.LocalProblem.requestedEffectAtCall( Inst, CalleeFun, [&](ByConstRef LeakFact) { - if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { + if (const auto *LeakSrc = psr::getOrNull(LocalState, LeakFact)) { reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); } }); // Generate taints from zero: - IState.LocalProblem.generateTaintsAtCall( + IState.LocalProblem.generateFactsAtCall( Inst, CalleeFun, [&](ByConstRef GenFact) { // Note: Assume, this gets called for all relevant aliases as well LocalState[GenFact].insert(SourceFactId(0)); @@ -610,14 +623,14 @@ template class MonoIFDSSolver { void handleSourceSinkConfig(IntermediateState &IState, DataFlowEnvironment &LocalState, FunctionId CurrFunId, n_t Inst) { - IState.LocalProblem.leaksTaint(Inst, [&](const auto &LeakFact) { + IState.LocalProblem.requestedEffect(Inst, [&](const auto &LeakFact) { if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); } }); // Generate taints from zero: - IState.LocalProblem.generateTaints(Inst, [&](const auto &GenFact) { + IState.LocalProblem.generateFacts(Inst, [&](const auto &GenFact) { LocalState[GenFact].insert(SourceFactId(0)); }); } @@ -720,9 +733,11 @@ template class MonoIFDSSolver { template void MonoIFDSSolver::solve() { // Step 1: Check for pre-analysis results: If any of them is null, create them + // TODO: !!! // Step 2: Pre-allocate buffers - Summaries.resize(Functions->size()); + Summaries.reserve(Functions->size()); + Summaries.append(psr::repeat(&PoolRes, Functions->size())); // Step 3: Analyze each CG-SCC in isolation diff --git a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h index 7fd06988d1..043b7da7aa 100644 --- a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h @@ -39,7 +39,7 @@ template class TopoFixpointDriver { return std::nullopt; } - auto IId = ControlFlowOrder::CFGOrderId(Max); + auto IId = typename ControlFlowOrder::CFGOrderId(Max); Max = WorkList.find_prev(Max); return CFO.Order[IId]; } diff --git a/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h b/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h new file mode 100644 index 0000000000..6d2c488885 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h @@ -0,0 +1,20 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraph.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionId.h" + +namespace psr { +Compressor +compressFunctions(const LLVMBasedCallGraph &CG, + llvm::ArrayRef EntryPoints); +} // namespace psr \ No newline at end of file diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index da306094e2..6462cc261b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -18,7 +18,6 @@ #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" #include "phasar/PhasarLLVM/Utils/LLVMAnalysisPrinter.h" #include "phasar/Utils/Compressor.h" -#include "phasar/Utils/FunctionCompressor.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/NullAnalysisPrinter.h" #include "phasar/Utils/SCCGeneric.h" @@ -67,14 +66,14 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { void initialSeeds(DataFlowEnvironment &SeedState, Compressor &SeedCompressor, f_t Fun); - void generateTaintsAtCall(n_t CS, f_t Callee, - llvm::function_ref GenFact); - void generateTaints(n_t CS, llvm::function_ref GenFact) { + void generateFactsAtCall(n_t CS, f_t Callee, + llvm::function_ref GenFact); + void generateFacts(n_t CS, llvm::function_ref GenFact) { // XXX: Implement (was not necessary for paper eval) } - void leakTaintsAtCall(n_t CS, f_t Callee, - llvm::function_ref LeakFact); - void leakTaints(n_t CS, llvm::function_ref LeakFact) { + void requestedEffectAtCall(n_t CS, f_t Callee, + llvm::function_ref LeakFact); + void requestedEffect(n_t Inst, llvm::function_ref LeakFact) { // XXX: Implement (was not necessary for paper eval) } void onResult(n_t Inst, d_t Fact) { @@ -87,12 +86,21 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { std::pmr::memory_resource *MRes) { return LocalAnalysis{ .TA = this, - .AC = AliasCache(AI, Config->getRegisteredSkipSeedsCallBack(), + .AC = AliasCache(AI, + Config->getRegisteredSkipSeedsCallBack() + ? (llvm::function_ref) + Config->getRegisteredSkipSeedsCallBack() + : llvm::function_ref{}, &UsedGlobals->GlobsPerSCC[CurrSCC], MRes), .CurrSCC = CurrSCC, }; } + void emitTextReport(llvm::raw_ostream &OS) const { + OS << "\n----- Found the following leaks -----\n"; + Printer->onFinalize(OS); + } + // TODO: shouldBeInSummary() private: diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index e8d1a71ae2..8c981903a8 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -15,6 +15,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSUninitializedVariables, "ifds-uninit", "Find usages DATA_FLOW_ANALYSIS_TYPES(IFDSConstAnalysis, "ifds-const", "Find variables that are actually mutated through the program") DATA_FLOW_ANALYSIS_TYPES(IFDSTaintAnalysis, "ifds-taint", "Simple, alias-aware taint-analysis. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IFDSCFLEnvTaintAnalysis, "ifds-fieldsens-taint", "Same base analysis as ifds-taint, but uses CFL-Environment-Transformers to achieve field sensitivity. Use with --analysis-config") +DATA_FLOW_ANALYSIS_TYPES(MonoIFDSTaintAnalysis, "monoifds-taint", "Simple, alias-aware taint-analysis as POC for the MonoIFDS solver. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(SparseIFDSTaintAnalysis, "sparse-ifds-taint", "Simple, alias-aware taint-analysis utilizing SparseIFDS. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IDEExtendedTaintAnalysis, "ide-xtaint", "More advanced alias-aware taint analysis that provides limited field-sensitivity. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IFDSTypeAnalysis, "ifds-type", "Simple type analysis") diff --git a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h new file mode 100644 index 0000000000..ac6b7fa4aa --- /dev/null +++ b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h @@ -0,0 +1,23 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/Utils/FunctionId.h" +#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/UsedGlobalsHolder.h" + +namespace psr { +UsedGlobalsHolder computeUsedGlobals( + const LLVMProjectIRDB &IRDB, + const Compressor &Functions, + const SCCHolder &SCCs, + const SCCDependencyGraph &Callers); +} // namespace psr diff --git a/include/phasar/Utils/BitSet.h b/include/phasar/Utils/BitSet.h index 732ff7f6e8..7a254c48cd 100644 --- a/include/phasar/Utils/BitSet.h +++ b/include/phasar/Utils/BitSet.h @@ -37,6 +37,10 @@ namespace psr { template class BitSet { static llvm::ArrayRef getWords(const llvm::BitVector &BV, uintptr_t & /*Store*/) { + if (BV.empty()) { + return {}; + } + return BV.getData(); } static llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, diff --git a/include/phasar/Utils/FunctionCompressor.h b/include/phasar/Utils/FunctionId.h similarity index 89% rename from include/phasar/Utils/FunctionCompressor.h rename to include/phasar/Utils/FunctionId.h index b7b6bab230..a063b1ee48 100644 --- a/include/phasar/Utils/FunctionCompressor.h +++ b/include/phasar/Utils/FunctionId.h @@ -19,7 +19,5 @@ PHASAR_STRONG_TYPEDEF(psr, uint32_t, FunctionId); namespace psr { -using FunctionCompressor = Compressor; - std::string to_string(FunctionId FId); } // namespace psr diff --git a/include/phasar/Utils/HashUtils.h b/include/phasar/Utils/HashUtils.h index 3815d29ff7..20b6ff2b07 100644 --- a/include/phasar/Utils/HashUtils.h +++ b/include/phasar/Utils/HashUtils.h @@ -10,7 +10,10 @@ #ifndef PHASAR_UTILS_HASHUTILS_H #define PHASAR_UTILS_HASHUTILS_H +#include "phasar/Utils/TypeTraits.h" + #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" #include #include @@ -22,6 +25,17 @@ struct PairHash { return llvm::DenseMapInfo>::getHashValue(Pair); } }; + +template struct DefaultHash { + [[nodiscard]] size_t operator()(const T &Value) const noexcept { + if constexpr (is_llvm_hashable_v) { + using llvm::hash_value; + return hash_value(Value); + } else { + return std::hash{}(Value); + } + } +}; } // namespace psr #endif // PHASAR_UTILS_HASHUTILS_H diff --git a/include/phasar/Utils/SCCGeneric.h b/include/phasar/Utils/SCCGeneric.h index d6cf50efe8..28529f6b84 100644 --- a/include/phasar/Utils/SCCGeneric.h +++ b/include/phasar/Utils/SCCGeneric.h @@ -231,9 +231,10 @@ template struct SCCOrder { /// to single nodes. The resulting graph is always a DAG, i.e., it contains no /// cycles template -#if __cplusplus >= 202002L - requires is_const_graph -#endif +// TODO: is_const_graph is too restrictive! +// #if __cplusplus >= 202002L +// requires is_const_graph +// #endif SCCDependencyGraph::vertex_t> computeSCCDependencies( const G &Graph, const SCCHolder::vertex_t> &SCCs) { diff --git a/include/phasar/Utils/TypedVector.h b/include/phasar/Utils/TypedVector.h index ef15f3e158..4f74ab53b1 100644 --- a/include/phasar/Utils/TypedVector.h +++ b/include/phasar/Utils/TypedVector.h @@ -95,6 +95,10 @@ class TypedVector { Vec.push_back(std::move(Val)); } + void append(auto &&Range) { + Vec.append(llvm::adl_begin(Range), llvm::adl_end(Range)); + } + void pop_back() { Vec.pop_back(); } [[nodiscard]] ValueT pop_back_val() { return Vec.pop_back_val(); } diff --git a/lib/PhasarLLVM/ControlFlow/FunctionCompressor.cpp b/lib/PhasarLLVM/ControlFlow/FunctionCompressor.cpp new file mode 100644 index 0000000000..e23e704fe2 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/FunctionCompressor.cpp @@ -0,0 +1,36 @@ +#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" + +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" + +using namespace psr; + +Compressor +psr::compressFunctions(const LLVMBasedCallGraph &CG, + llvm::ArrayRef EntryPoints) { + Compressor Functions; + Functions.reserve(CG.getNumVertexFunctions()); + llvm::SmallVector WL; + WL.append(EntryPoints.begin(), EntryPoints.end()); + + while (!WL.empty()) { + const auto *Fn = WL.pop_back_val(); + + auto Inserted = Functions.insert(Fn).second; + if (!Inserted) { + continue; + } + + for (const auto &I : llvm::instructions(Fn)) { + const auto *CS = llvm::dyn_cast(&I); + if (!CS) { + continue; + } + + auto Callees = CG.getCalleesOfCallAt(CS); + WL.append(Callees.begin(), Callees.end()); + } + } + + return Functions; +} diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 05e30c5aa2..08224daa6b 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -6,17 +6,15 @@ #include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Compressor.h" -#include "phasar/Utils/FunctionCompressor.h" #include "phasar/Utils/MapUtils.h" #include "phasar/Utils/SCCGeneric.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/InstrTypes.h" - -#include -#include -#include -#include -#include +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" using namespace psr; using namespace psr::monoifds; @@ -277,7 +275,7 @@ void monoifds::TaintAnalysis::LocalAnalysis::initialSeeds( } } -void monoifds::TaintAnalysis::LocalAnalysis::generateTaintsAtCall( +void monoifds::TaintAnalysis::LocalAnalysis::generateFactsAtCall( n_t CS, f_t Callee, llvm::function_ref GenFact) { forallGeneratedFacts(*TA->Config, llvm::cast(CS), Callee, [this, CS, GenFact](const auto *Fact) { @@ -286,7 +284,7 @@ void monoifds::TaintAnalysis::LocalAnalysis::generateTaintsAtCall( }); } -void monoifds::TaintAnalysis::LocalAnalysis::leakTaintsAtCall( +void monoifds::TaintAnalysis::LocalAnalysis::requestedEffectAtCall( n_t CS, f_t Callee, llvm::function_ref LeakFact) { forallLeakedFacts(*TA->Config, llvm::cast(CS), Callee, LeakFact); diff --git a/lib/PhasarLLVM/Utils/UsedGlobals.cpp b/lib/PhasarLLVM/Utils/UsedGlobals.cpp new file mode 100644 index 0000000000..ca7f766eb6 --- /dev/null +++ b/lib/PhasarLLVM/Utils/UsedGlobals.cpp @@ -0,0 +1,175 @@ +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" + +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/FunctionId.h" +#include "phasar/Utils/SCCGeneric.h" + +#include "llvm/IR/Module.h" + +#include +#include +#include +#include + +using namespace psr; + +static bool isEffectivelyConstant(const llvm::GlobalVariable *Glob) { + auto Name = Glob->getName(); + if (Name.startswith("_ZTV") || Name.startswith("_ZTI")) { + return true; + } + + for (const auto &Use : Glob->uses()) { + const auto *User = Use.getUser(); + if (llvm::isa(User)) { + continue; + } + + if (const auto *MemTrn = llvm::dyn_cast(User)) { + if (MemTrn->getRawSource() != Use.get()) { + return false; + } + + continue; + } + + if (const auto *Call = llvm::dyn_cast(User)) { + if (Use.get() == Call->getCalledOperand()) { + continue; + } + + if (const auto *DestFun = Call->getCalledFunction()) { + if (DestFun->onlyReadsMemory() || + DestFun->onlyAccessesInaccessibleMemory()) { + // llvm::errs() << "[NOTE]: At " << *Call + // << ": Readonly or Inaccessiblememonly\n"; + continue; + } + } + + auto Idx = Use.getOperandNo(); + bool IsReadonlyParam = + Call->paramHasAttr(Idx, llvm::Attribute::NoCapture) && + (Call->paramHasAttr(Idx, llvm::Attribute::ReadOnly) || + Call->paramHasAttr(Idx, llvm::Attribute::ReadNone)); + + // llvm::errs() << "[NOTE]: At " << *Call << ": Param " << Idx + // << " is readonly: " << IsReadonlyParam << '\n'; + + if (!IsReadonlyParam) { + return false; + } + + continue; + } + + // llvm::errs() << "Other: " << *Use << ": Idx=" << Use.getOperandNo() + // << "; User=" << *Use.getUser() << '\n'; + return false; + } + return true; +} + +static llvm::SmallDenseSet +computeEffectivelyConstGlobals(const LLVMProjectIRDB &IRDB) { + llvm::SmallDenseSet Ret; + + for (const auto *Glob : IRDB.getAllGlobals()) { + if (Glob->isConstant()) { + continue; + } + + if (isEffectivelyConstant(Glob)) { + Ret.insert(Glob); + } + } + + return Ret; +} + +static void initializeFun(auto &Globs, const llvm::Function *Fun, + const auto &EffectivelyConstGlobals) { + for (const auto &Inst : llvm::instructions(Fun)) { + if (Inst.isDebugOrPseudoInst()) { + continue; + } + + for (const auto *Op : Inst.operand_values()) { + if (const auto *Glob = llvm::dyn_cast( + Op->stripInBoundsConstantOffsets())) { + + // TODO: ispointerty must be done by the taint config! + if (Glob->isConstant() || EffectivelyConstGlobals.contains(Glob) || + !Glob->getValueType()->isPointerTy()) { + continue; + } + + Globs.insert(Glob); + } + } + } +} + +static void +initialize(UsedGlobalsHolder &Ret, + const Compressor &Functions, + const SCCHolder &SCCs, + const auto &EffectivelyConstGlobals) { + for (auto FunId : psr::iota(Functions.size())) { + const auto *Fun = Functions[FunId]; + auto &InitialGlobs = Ret.InitialGlobsPerSCC[SCCs.SCCOfNode[FunId]]; + auto &Globs = Ret.GlobsPerSCC[SCCs.SCCOfNode[FunId]]; + + initializeFun(InitialGlobs, Fun, EffectivelyConstGlobals); + Globs = InitialGlobs; + } +} + +static void +propagateGlobals(UsedGlobalsHolder &Ret, + const SCCDependencyGraph &Callers) { + std::deque> WL; + BitSet> Seen(Callers.ChildrenOfSCC.size()); + + for (auto Leaf : Callers.SCCRoots) { + WL.push_back(Leaf); + Seen.insert(Leaf); + } + + while (!WL.empty()) { + auto CurrSCC = WL.front(); + WL.pop_front(); + + const auto &Globs = Ret.GlobsPerSCC[CurrSCC]; + + for (auto Caller : Callers.ChildrenOfSCC[CurrSCC]) { + auto &CallerGlobs = Ret.GlobsPerSCC[Caller]; + bool Inserted = false; + for (const auto *G : Globs) { + Inserted |= CallerGlobs.insert(G).second; + } + + if (Seen.tryInsert(Caller) || Inserted) { + WL.push_back(Caller); + } + } + } +} + +UsedGlobalsHolder psr::computeUsedGlobals( + const LLVMProjectIRDB &IRDB, + const Compressor &Functions, + const SCCHolder &SCCs, + const SCCDependencyGraph &Callers) { + UsedGlobalsHolder Ret; + Ret.InitialGlobsPerSCC.resize(SCCs.size()); + Ret.GlobsPerSCC.resize(SCCs.size()); + + auto EffectivelyConstGlobals = computeEffectivelyConstGlobals(IRDB); + + initialize(Ret, Functions, SCCs, EffectivelyConstGlobals); + propagateGlobals(Ret, Callers); + + return Ret; +} diff --git a/lib/Utils/FunctionCompressor.cpp b/lib/Utils/FunctionId.cpp similarity index 71% rename from lib/Utils/FunctionCompressor.cpp rename to lib/Utils/FunctionId.cpp index b7c7d4719d..09109f937c 100644 --- a/lib/Utils/FunctionCompressor.cpp +++ b/lib/Utils/FunctionId.cpp @@ -1,4 +1,4 @@ -#include "phasar/Utils/FunctionCompressor.h" +#include "phasar/Utils/FunctionId.h" #include diff --git a/tools/phasar-cli/Controller/AnalysisController.cpp b/tools/phasar-cli/Controller/AnalysisController.cpp index 544142e8a6..1f6feb5657 100644 --- a/tools/phasar-cli/Controller/AnalysisController.cpp +++ b/tools/phasar-cli/Controller/AnalysisController.cpp @@ -131,6 +131,9 @@ static void executeWholeProgram(AnalysisController &Data) { case DataFlowAnalysisType::IFDSCFLEnvTaintAnalysis: executeIFDSCFLEnvTaint(Data); continue; + case DataFlowAnalysisType::MonoIFDSTaintAnalysis: + executeMonoIFDSTaint(Data); + continue; case DataFlowAnalysisType::SparseIFDSTaintAnalysis: executeSparseIFDSTaint(Data); continue; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 42547dbff5..539b7d1439 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -23,6 +23,8 @@ #include "AnalysisController.h" +#include + namespace psr { template class IDESolver; } // namespace psr @@ -33,6 +35,7 @@ LLVM_LIBRARY_VISIBILITY void executeIFDSUninitVar(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSConst(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSTaint(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSCFLEnvTaint(AnalysisController &Data); +LLVM_LIBRARY_VISIBILITY void executeMonoIFDSTaint(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSType(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSSolverTest(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void @@ -62,7 +65,9 @@ static constexpr unsigned K = 3; makeTaintConfig(AnalysisController &Data); template -static void statsEmitter(llvm::raw_ostream & /*OS*/, const T & /*Solver*/) {} +static void statsEmitter(llvm::raw_ostream &OS, const T & /*Solver*/) { + OS << "No solver-stats available!\n"; +} template static void statsEmitter(llvm::raw_ostream &OS, const IDESolver &Solver); @@ -71,41 +76,39 @@ static void emitRequestedDataFlowResults(AnalysisController &Data, T &Solver) { auto EmitterOptions = Data.EmitterOptions; const auto &ResultDirectory = Data.ResultDirectory; - if (EmitterOptions & AnalysisControllerEmitterOptions::EmitTextReport) { + const auto PrintResult = [&ResultDirectory](llvm::StringRef Suffix, + auto WithStream) { if (!ResultDirectory.empty()) { if (auto OFS = - openFileStream(ResultDirectory.string() + "/psr-report.txt")) { - Solver.emitTextReport(*OFS); + openFileStream(llvm::Twine(ResultDirectory.string()) + Suffix)) { + WithStream(*OFS); } } else { - Solver.emitTextReport(llvm::outs()); + WithStream(llvm::outs()); } + }; + + if (EmitterOptions & AnalysisControllerEmitterOptions::EmitTextReport) { + PrintResult("/psr-report.txt", + [&](auto &OS) { Solver.emitTextReport(OS); }); } if (EmitterOptions & AnalysisControllerEmitterOptions::EmitGraphicalReport) { - if (!ResultDirectory.empty()) { - if (auto OFS = - openFileStream(ResultDirectory.string() + "/psr-report.html")) { - Solver.emitGraphicalReport(*OFS); + PrintResult("/psr-report.html", [&](auto &OS) { + if constexpr (requires() { Solver.emitGraphicalReport(llvm::outs()); }) { + Solver.emitGraphicalReport(OS); + } else { + OS << "Graphical report not available!\n"; } - } else { - Solver.emitGraphicalReport(llvm::outs()); - } + }); } if (EmitterOptions & AnalysisControllerEmitterOptions::EmitRawResults) { - if (!ResultDirectory.empty()) { - if (auto OFS = openFileStream(ResultDirectory.string() + - "/psr-raw-results.txt")) { - Solver.dumpResults(*OFS); - } - } else { - Solver.dumpResults(llvm::outs()); - } + PrintResult("/psr-raw-results.txt", + [&](auto &OS) { Solver.dumpResults(OS); }); } if (EmitterOptions & AnalysisControllerEmitterOptions::EmitESGAsDot) { llvm::outs() << "Front-end support for 'EmitESGAsDot' to be implemented\n"; } if (EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsText) { - statsEmitter(llvm::outs(), Solver); } } diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp new file mode 100644 index 0000000000..f988501a29 --- /dev/null +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -0,0 +1,57 @@ +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/CGSCCs.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" +#include "phasar/PhasarLLVM/Pointer/CachedLLVMAliasIterator.h" +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasIterator.h" +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" + +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeMonoIFDSTaint(AnalysisController &Data) { + auto Config = makeTaintConfig(Data); + + auto &IRDB = Data.HA->getProjectIRDB(); + auto &ICF = Data.HA->getICFG(); + + const auto &CG = ICF.getCallGraph(); + auto FC = + compressFunctions(CG, psr::getEntryFunctions(IRDB, Data.EntryPoints)); + + auto SCCs = computeCGSCCs(CG, ICF, FC); + auto SCCCallers = computeCGSCCCallers(CG, ICF, FC, SCCs); + + auto UsedGlobals = computeUsedGlobals(IRDB, FC, SCCs, SCCCallers); + + FilteredLLVMAliasIterator FAI(&Data.HA->getAliasInfo()); + CachedLLVMAliasIterator CAI(&FAI); + monoifds::TaintAnalysis TA(&Config, &UsedGlobals, &CAI); + monoifds::MonoIFDSSolver Solver(&TA, &ICF); + Solver.setCGSCCs(&SCCs).setFunctionCompressor(&FC); + + { + std::optional MeasureTime; + if (Data.EmitterOptions & + AnalysisControllerEmitterOptions::EmitStatisticsAsText) { + MeasureTime.emplace([](auto Elapsed) { + llvm::outs() << "Elapsed: " << hms{Elapsed} << '\n'; + }); + } + + Solver.solve(); + } + + emitRequestedDataFlowResults(Data, Solver); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index 2f60ae9126..b5de60ba49 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -8,7 +8,6 @@ set(IfdsIdeSources SparseIDESolverTest.cpp IterativeIDESolverTest.cpp CFLFieldSensTest.cpp - MonoIFDSSolverTest.cpp ) foreach(TEST_SRC ${IfdsIdeSources}) diff --git a/unittests/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt new file mode 100644 index 0000000000..37954f72e6 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/MonoIfds/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Problems) diff --git a/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/CMakeLists.txt new file mode 100644 index 0000000000..8b6c73aa3b --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB_RECURSE MONOIFDS_TEST_SRC *.cpp) + +foreach(TEST_SRC ${MONOIFDS_TEST_SRC}) + add_phasar_unittest(${TEST_SRC}) +endforeach(TEST_SRC) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp b/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp similarity index 100% rename from unittests/PhasarLLVM/DataFlow/IfdsIde/MonoIFDSSolverTest.cpp rename to unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp From 470a98814b8b8904ba4b1a24c5623b320faa32c3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 31 Mar 2026 10:01:06 +0200 Subject: [PATCH 05/29] Fix ieration order + make tests pass --- .clang-tidy | 1 + include/phasar/ControlFlow/CGSCCs.h | 174 +++++++++++------- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 21 +++ 3 files changed, 133 insertions(+), 63 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d0cbb771d6..a3209d4c13 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -20,6 +20,7 @@ Checks: '-*, -readability-redundant-member-init, -readability-use-anyofallof, -readability-avoid-return-with-void-value, + -readability-use-std-min-max, cppcoreguidelines-*, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, diff --git a/include/phasar/ControlFlow/CGSCCs.h b/include/phasar/ControlFlow/CGSCCs.h index e588868228..f484200094 100644 --- a/include/phasar/ControlFlow/CGSCCs.h +++ b/include/phasar/ControlFlow/CGSCCs.h @@ -11,80 +11,109 @@ #include "phasar/ControlFlow/CFG.h" #include "phasar/ControlFlow/CallGraph.h" +#include "phasar/Utils/BitSet.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/SCCGeneric.h" -#include "phasar/Utils/Utilities.h" #include #include -#include - namespace psr { -template -struct CompressedRevCG { - const CallGraph *CG; - const Compressor *Functions; - [[no_unique_address]] FunctionOfFn FunctionOf; -}; - -template -struct GraphTraits> { - using graph_type = CompressedRevCG; - using value_type = F; - using vertex_t = FunctionId; - using edge_t = vertex_t; - static constexpr vertex_t Invalid = - vertex_t(std::numeric_limits>::max()); - - static auto transformer(const graph_type &G) { - return [&G](ByConstRef CS) { - return G.Functions->getOrNull(G.FunctionOf(CS)).value_or(Invalid); - }; - } +// TODO: Use SCCGeneric algorithms here! - static constexpr auto ValidId = [](vertex_t Vtx) { return Vtx != Invalid; }; +template +SCCHolder +computeCGSCCs(const psr::CallGraph &CG, const CFGOf auto &CF, + const Compressor &Functions) { - static auto outEdges(const graph_type &G, vertex_t Vtx) { - return llvm::make_filter_range( - llvm::map_range(G.CG->getCallersOf((*G.Functions)[Vtx]), - transformer(G)), - ValidId); - } - static size_t outDegree(const graph_type &G, vertex_t Vtx) { - return G.CG->getCallersOf((*G.Functions)[Vtx]).size(); - } - static auto vertices(const graph_type &G) noexcept { - return psr::iota(G.Functions->size()); - } + SCCHolder Ret{}; - static size_t size(const graph_type &G) noexcept { - return G.Functions->size(); - } + auto NumFuns = Functions.size(); - static vertex_t target(edge_t Edge) noexcept { return Edge; } -}; + if (!NumFuns) { + return Ret; + } -namespace detail { -constexpr auto getCFGFunctionOf(const CFG auto &CF) { - return [&CF](const auto &Inst) { return CF.getFunctionOf(Inst); }; -} -} // namespace detail + Ret.SCCOfNode.resize(NumFuns); + + llvm::SmallVector Disc(NumFuns, UINT32_MAX); + llvm::SmallVector Low(NumFuns, UINT32_MAX); + BitSet OnStack(NumFuns); + BitSet Seen(NumFuns); + + llvm::SmallVector Stack; + uint32_t Time = 0; + + constexpr auto SetMin = [](uint32_t &InOut, uint32_t Other) { + if (Other < InOut) { + InOut = Other; + } + }; + + const auto Dfs = [&](auto &&Dfs, FunctionId CurrNode) -> void { + auto CurrTime = Time++; + Disc[size_t(CurrNode)] = CurrTime; + Low[size_t(CurrNode)] = CurrTime; + Stack.push_back(CurrNode); + OnStack.insert(CurrNode); + + const auto &CurrFun = Functions[CurrNode]; + for (const auto &Inst : CF.getAllInstructionsOf(CurrFun)) { + if (!CF.isCallSite(Inst)) { + continue; + } + + for (const auto &Succ : CG.getCalleesOfCallAt(Inst)) { + auto SuccNode = Functions.get(Succ); + if (Disc[size_t(SuccNode)] == UINT32_MAX) { + // Tree-edge: Not seen yet --> recurse + + Dfs(Dfs, SuccNode); + SetMin(Low[size_t(CurrNode)], Low[size_t(SuccNode)]); + } else if (OnStack.contains(SuccNode)) { + // Back-edge --> circle! + + SetMin(Low[size_t(CurrNode)], Disc[size_t(SuccNode)]); + } + } + } + + if (Low[size_t(CurrNode)] == Disc[size_t(CurrNode)]) { + // Found SCC + + auto SCCIdx = SCCId(Ret.NodesInSCC.size()); + auto &FunsInSCC = Ret.NodesInSCC.emplace_back(); + + assert(!Stack.empty()); + + while (Stack.back() != CurrNode) { + auto Fun = Stack.pop_back_val(); + Ret.SCCOfNode[Fun] = SCCIdx; + OnStack.erase(Fun); + Seen.insert(Fun); + FunsInSCC.push_back(Fun); + } + + auto Fun = Stack.pop_back_val(); + Ret.SCCOfNode[Fun] = SCCIdx; + OnStack.erase(Fun); + Seen.insert(Fun); + FunsInSCC.push_back(Fun); + } + }; + + for (auto FunId : iota(NumFuns)) { + if (!Seen.contains(FunId)) { + Dfs(Dfs, FunId); + } + } -template -SCCHolder -computeCGSCCs(const psr::CallGraph &CG, const CFGOf auto &CF, - const Compressor &Functions) { - return computeSCCs(CompressedRevCG{ - .CG = &CG, - .Functions = &Functions, - .FunctionOf = detail::getCFGFunctionOf(CF), - }); + return Ret; } template @@ -92,13 +121,32 @@ SCCDependencyGraph computeCGSCCCallers(const psr::CallGraph &CG, const CFGOf auto &CF, const Compressor &Functions, const SCCHolder &SCCs) { - return computeSCCDependencies( - CompressedRevCG{ - .CG = &CG, - .Functions = &Functions, - .FunctionOf = detail::getCFGFunctionOf(CF), - }, - SCCs); + SCCDependencyGraph Ret; + Ret.ChildrenOfSCC.resize(SCCs.size()); + + BitSet> Leaves(SCCs.size(), true); + + for (auto FunId : iota(Functions.size())) { + const auto *Fun = Functions[FunId]; + auto SCC = SCCs.SCCOfNode[FunId]; + + for (const auto &CS : CG.getCallersOf(Fun)) { + const auto &CSFun = CF.getFunctionOf(CS); + if (auto CSFunId = Functions.getOrNull(CSFun)) { + auto CSFunSCC = SCCs.SCCOfNode[*CSFunId]; + if (CSFunSCC != SCC) { + Ret.ChildrenOfSCC[SCC].insert(CSFunSCC); + Leaves.erase(CSFunSCC); + } + } + } + } + + Ret.SCCRoots.reserve(Leaves.size()); + Leaves.foreach ( + [&](auto Leaf) { Ret.SCCRoots.push_back(SCCId(Leaf)); }); + + return Ret; } } // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 28b36be53e..10a281d37e 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -21,13 +21,16 @@ #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/HashUtils.h" #include "phasar/Utils/Lazy.h" +#include "phasar/Utils/Logger.h" #include "phasar/Utils/MapUtils.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/Nullable.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/RepeatIterator.h" #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypedVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include @@ -45,6 +48,8 @@ template class MonoIFDSSolver { using f_t = typename ProblemT::ProblemAnalysisDomain::f_t; using v_t = typename ProblemT::ProblemAnalysisDomain::v_t; + static constexpr llvm::StringLiteral LogCategory = "MonoIFDSSolver"; + explicit MonoIFDSSolver(ProblemT *Problem, const i_t *ICF, std::pmr::polymorphic_allocator<> Alloc = std::pmr::get_default_resource()) @@ -172,6 +177,10 @@ template class MonoIFDSSolver { void computeFixpointForSCC(SCCId CurrSCC, llvm::ArrayRef CurrFuns) { + + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[computeFixpointForSCC]: " << CurrSCC.Value); + const size_t SCCSize = CurrFuns.size(); const bool InRecursion = SCCSize > 1; IntermediateState IState(Problem, &PoolRes, CurrSCC, InRecursion); @@ -250,6 +259,8 @@ template class MonoIFDSSolver { void submitInitialSeeds(IntermediateState &IState, auto &Driver, Compressor &SeedCompressor, ByConstRef Fun, SCCId CurrSCC) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[submitInitialSeeds]: For fun " << FToString(Fun)); const auto &SPs = ICF->getStartPointsOf(Fun); const auto &Zero = IState.LocalProblem.getZeroValue(); @@ -600,6 +611,9 @@ template class MonoIFDSSolver { IState.LocalProblem.requestedEffectAtCall( Inst, CalleeFun, [&](ByConstRef LeakFact) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[handleCallSrcSinksAndMayRecurse]: LeakFact: " + << DToString(LeakFact)); if (const auto *LeakSrc = psr::getOrNull(LocalState, LeakFact)) { reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); @@ -609,6 +623,9 @@ template class MonoIFDSSolver { // Generate taints from zero: IState.LocalProblem.generateFactsAtCall( Inst, CalleeFun, [&](ByConstRef GenFact) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[handleCallSrcSinksAndMayRecurse]: GenFact: " + << DToString(GenFact)); // Note: Assume, this gets called for all relevant aliases as well LocalState[GenFact].insert(SourceFactId(0)); }); @@ -654,6 +671,10 @@ template class MonoIFDSSolver { void reportOrPropagateLeak(IntermediateState &IState, FunctionId CurrFunId, n_t LeakInst, d_t LeakFact, SourceFactSet From) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[reportOrPropagateLeak]: " << DToString(LeakFact) + << " AT " + << NToString(LeakInst)); // The zero fact has always Id 0! if (From.tryErase(SourceFactId(0))) { if (Leaks[LeakInst].insert(LeakFact).second) { From f4455fda65d3f3eb073bc597d4bf34416f10bfa3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 31 Mar 2026 10:38:05 +0200 Subject: [PATCH 06/29] Add initializer to MonoIFDSSolver --- include/phasar/ControlFlow/CGSCCs.h | 13 ++-- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 60 +++++++++++++++++-- .../ControlFlow/EntryFunctionUtils.h | 4 ++ .../ControlFlow/EntryFunctionUtils.cpp | 34 +++++++++++ 4 files changed, 99 insertions(+), 12 deletions(-) diff --git a/include/phasar/ControlFlow/CGSCCs.h b/include/phasar/ControlFlow/CGSCCs.h index f484200094..60e1d6ab6c 100644 --- a/include/phasar/ControlFlow/CGSCCs.h +++ b/include/phasar/ControlFlow/CGSCCs.h @@ -12,29 +12,26 @@ #include "phasar/ControlFlow/CFG.h" #include "phasar/ControlFlow/CallGraph.h" #include "phasar/Utils/BitSet.h" -#include "phasar/Utils/ByRef.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" -#include "phasar/Utils/GraphTraits.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/SCCGeneric.h" -#include -#include - namespace psr { // TODO: Use SCCGeneric algorithms here! -template +// Note: Use forward edges (i.e., cs->callee), such that the SCC-order reflects +// the bottom-up iteration order. +template C> + requires InstructionClassifier SCCHolder -computeCGSCCs(const psr::CallGraph &CG, const CFGOf auto &CF, +computeCGSCCs(const psr::CallGraph &CG, const C &CF, const Compressor &Functions) { SCCHolder Ret{}; auto NumFuns = Functions.size(); - if (!NumFuns) { return Ret; } diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 10a281d37e..84845fdc80 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -16,6 +16,7 @@ #include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" #include "phasar/DataFlow/MonoIfds/MonoIFDSProblem.h" #include "phasar/DataFlow/MonoIfds/RPOWorkList.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" @@ -28,13 +29,17 @@ #include "phasar/Utils/Printer.h" #include "phasar/Utils/RepeatIterator.h" #include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TypeName.h" #include +#include #include +#include #include #include @@ -98,8 +103,11 @@ template class MonoIFDSSolver { FunctionSummary(std::pmr::memory_resource *MRes) : LeakIf(MRes) {} }; + using LocalAnalysis = decltype(std::declval().localAnalysis( + SCCId(), std::declval())); + struct IntermediateState { - typename ProblemT::LocalAnalysis LocalProblem; + LocalAnalysis LocalProblem; node_hash_map> PathEdges; node_hash_map> Incoming; @@ -175,6 +183,50 @@ template class MonoIFDSSolver { } }; + void initializeFunctions() { + if (SCCs) { + throw std::logic_error("SCCs without FunctionCompressor?"); + } + + if constexpr (requires() { + { + Problem->getEntryPoints() + } -> psr::is_iterable_over_v; + }) { + Functions = std::make_unique>( + compressFunctions(ICF->getCallGraph(), Problem->getEntryPoints())); + } else if constexpr (requires() { + { + Problem->getEntryPoints() + } -> psr::is_iterable_over_v; + }) { + Functions = + std::make_unique>(compressFunctions( + ICF->getCallGraph(), + psr::getEntryFunctions(*ICF, Problem->getEntryPoints()))); + } else { + throw std::logic_error("The analysis problem " + + llvm::getTypeName().str() + + " does not provide getEntryPoints(). So, you " + "must set a FunctionCompressor by calling " + "setFunctionCompressor() on the solver!"); + } + } + + void initializeSCCs() { + SCCs = std::make_unique>( + computeCGSCCs(ICF->getCallGraph(), *ICF, *Functions)); + } + + void initialize() { + if (!Functions) { + initializeFunctions(); + } + if (!SCCs) { + initializeSCCs(); + } + } + void computeFixpointForSCC(SCCId CurrSCC, llvm::ArrayRef CurrFuns) { @@ -214,7 +266,7 @@ template class MonoIFDSSolver { for (auto FunId : llvm::reverse(CurrFuns)) { const auto *Fun = (*Functions)[FunId]; submitInitialSeeds(IState, Driver, Summaries[FunId].SourceFactIds, - Fun, CurrSCC); + Fun); } Driver.run([&](n_t BlockStart) { analyzeBlock(IState, Driver, BlockStart); @@ -258,7 +310,7 @@ template class MonoIFDSSolver { void submitInitialSeeds(IntermediateState &IState, auto &Driver, Compressor &SeedCompressor, - ByConstRef Fun, SCCId CurrSCC) { + ByConstRef Fun) { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[submitInitialSeeds]: For fun " << FToString(Fun)); const auto &SPs = ICF->getStartPointsOf(Fun); @@ -754,7 +806,7 @@ template class MonoIFDSSolver { template void MonoIFDSSolver::solve() { // Step 1: Check for pre-analysis results: If any of them is null, create them - // TODO: !!! + initialize(); // Step 2: Pre-allocate buffers Summaries.reserve(Functions->size()); diff --git a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h index 78003dcd9b..a76111c500 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h +++ b/include/phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h @@ -18,10 +18,14 @@ namespace psr { class LLVMProjectIRDB; +class LLVMBasedICFG; [[nodiscard]] std::vector getEntryFunctions(const LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints); +[[nodiscard]] std::vector +getEntryFunctions(const LLVMBasedICFG &ICF, + llvm::ArrayRef EntryPoints); [[nodiscard]] std::vector getEntryFunctionsMut(LLVMProjectIRDB &IRDB, diff --git a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp index 13d41123fd..4c177acae2 100644 --- a/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp +++ b/lib/PhasarLLVM/ControlFlow/EntryFunctionUtils.cpp @@ -1,6 +1,7 @@ #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/ControlFlow/GlobalCtorsDtorsModel.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/Utils/Logger.h" @@ -36,6 +37,39 @@ psr::getEntryFunctions(const LLVMProjectIRDB &IRDB, return UserEntryPointFns; } +// TODO: Reduce code duplication: +[[nodiscard]] std::vector +psr::getEntryFunctions(const LLVMBasedICFG &ICF, + llvm::ArrayRef EntryPoints) { + std::vector UserEntryPointFns; + if (EntryPoints.size() == 1 && EntryPoints.front() == "__ALL__") { + UserEntryPointFns.reserve(ICF.getNumVertexFunctions()); + // Handle the special case in which a user wishes to treat all functions as + // entry points. + for (const auto *Fun : ICF.getAllFunctions()) { + // Only functions with external linkage (or 'main') can be called from the + // outside! + if (!Fun->isDeclaration() && Fun->hasName() && + (Fun->hasExternalLinkage() || Fun->getName() == "main")) { + UserEntryPointFns.push_back(Fun); + } + } + } else { + UserEntryPointFns.reserve(EntryPoints.size()); + for (const auto &EntryPoint : EntryPoints) { + const auto *F = ICF.getFunction(EntryPoint); + if (F == nullptr || F->isDeclaration()) { + PHASAR_LOG_LEVEL(WARNING, + "Could not retrieve function for entry point '" + << EntryPoint << "'"); + continue; + } + UserEntryPointFns.push_back(F); + } + } + return UserEntryPointFns; +} + [[nodiscard]] std::vector psr::getEntryFunctionsMut(LLVMProjectIRDB &IRDB, llvm::ArrayRef EntryPoints) { From 1ff346b2f16d6fddb17071c874a5b20323c5175c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 10 Apr 2026 10:01:57 +0200 Subject: [PATCH 07/29] Add some documentation + add summaryFlow() to MonoIFDS --- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 84 ++++++++++++++++++- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 47 ++++++----- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 10 ++- .../Problems/MonoIFDSTaintAnalysis.cpp | 3 +- 4 files changed, 118 insertions(+), 26 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 3b56afd171..ed094fc180 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -19,51 +19,133 @@ #include #include +/// \file +/// Defines requirements for an analysis problem that can be solved by the +/// MonoIFDS solver. +/// Since MonoIFDS solves analyses bottom-up, each CG-SCC is analyzed in +/// isolation. The solver will call Problem.localAnalysis(...) once per SCC and +/// use the returned LocalMonoIFDSProblem to drive the solving process. +/// Although the solver currently is only single-threaded, you can expect that +/// (also in a multi-threaded future) LocalMonoIFDSProblem instances are not +/// accessed by multiple threads at a time, while different instances may be +/// solved on different threads. + namespace psr::monoifds { +/// \brief Defines requirements for a MonoIFDS-compatible analysis domain. template concept MonoIFDSAnalysisDomain = IsAnalysisDomain; template concept LocalMonoIFDSProblem = requires(T &Problem, DataFlowEnvironment &InOut, + const DataFlowEnvironment &In, typename Dom::n_t Inst, const typename Dom::n_t &Fact, const typename Dom::f_t &Fun, Compressor &SeedCompressor) { + /// Intra-procedural data-flow. Input facts are passed-in as InOut; + /// modifications are performed in-place. + /// + /// Corresponds to the $flow()$ function in the paper. Problem.normalFlow(InOut, Inst); + /// Intra-procedural data-flow at call-sites. Input facts are passed-in as + /// InOut; modifications are performed in-place. Kills facts that may be + /// strongly updated by the callee. Don't use it to *generate* facts. + /// + /// Corresponds to the $callFlow()$ function in the paper. Problem.callToRetFlow(InOut, Inst); + /// Inter-procedural data-flow at exit-statements; Maps callee-facts back + /// to the return-site in the caller. As with normal IFDS, this function + /// will be called for each incoming Fact, that should be mapped back; + /// return-site facts are returned by this function. + /// + /// Corresponds to the $returnVal()$ function in the paper. { Problem.returnFlow(Inst, Fact) } -> psr::is_iterable_over_v; + /// Inter-procedural data-flow at entry-statements; Maps + /// callee-source-facts back to the call-site in the caller. This function + /// will be called for each source Fact; call-site facts are returned by + /// this function. + /// + /// Corresponds to the $passArgs^{-1}()$ function in the paper. { - Problem.invReturnFlow(Inst, Fact) + Problem.invCallFlow(Inst, Fact) } -> psr::is_iterable_over_v; + /// Applies a pre-computed summary of Fun at Inst into InOut, if + /// applicable. + /// + /// Useful for pre-known taint-propagators and declaration-only library + /// functions. + /// + /// \returns True, iff a summary was applied. This will take precedence + /// over a summary that the solver may have computed for Fun! + { + Problem.summaryFlow(In, InOut, Inst, Fun) + } -> std::convertible_to; + + /// The special zero value, aka. $\Lambda$. Always holds. Facts that are + /// generated unconditionally originate from zero. { Problem.getZeroValue() } -> std::convertible_to; + /// Approximates the source-facts that should hold at the entry of Fun. + /// Input the facts in the InOut map as + /// `SeedState[Fact].insert(SeedCompressor.getOrInsert(Fact))` Problem.initialSeeds(InOut, SeedCompressor, Fun); + /// At a call-site Inst calling Fun, invokes the given callback for each + /// fact that should be generated from zero there. + /// + /// Useful for taint sources. Problem.generateFactsAtCall(Inst, Fun, [](const typename Dom::d_t & GenFact) {}); + /// At a non-call-site Inst, invokes the given callback for each + /// fact that should be generated from zero there. + /// + /// Useful for taint sources. Problem.generateFacts(Inst, [](const typename Dom::d_t & GenFact) {}); + + /// Invokes the given callback for each LeakFact for which the solver + /// should call onResult(Inst, LeakFact), if LeakFacts holds at Inst. + /// + /// Useful for taint sinks. Problem.requestedEffectAtCall(Inst, Fun, [](const typename Dom::d_t & LeakFact) {}); + /// Invokes the given callback for each LeakFact for which the solver + /// should call onResult(Inst, LeakFact), if LeakFacts holds at Inst. + /// + /// Useful for taint sinks. Problem.requestedEffect(Inst, [](const typename Dom::d_t & LeakFact) {}); + + /// Notifies the problem that a previously requested leak-Fact now is + /// known to hold at Inst. + /// + /// Useful for reporting taint leaks. Problem.onResult(Inst, Fact); }; +/// \brief Defines requirements for an analysis problem that can be solved by +/// the MonoIFDSSolver. template concept MonoIFDSProblem = requires(T &Problem, SCCId CurrSCC, std::pmr::memory_resource *MRes, llvm::raw_ostream &OS) { + /// The analysis domain. Defines the type of data-flow facts, and the IR + /// on which the analysis can be run. typename T::ProblemAnalysisDomain; requires MonoIFDSAnalysisDomain; + /// Create a local analysis for the given SCC. + /// Use the given std::memory_resource to allocate node-based containers, + /// if you have any. { Problem.localAnalysis(CurrSCC, MRes) } -> LocalMonoIFDSProblem; + + /// Pretty-print the analysis results into the given llvm::raw_ostream. Problem.emitTextReport(OS); }; } // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 84845fdc80..79210a0778 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -45,6 +45,9 @@ namespace psr::monoifds { +/// \brief Implements the MonoIFDS algorithm, as presented in "Scaling Bottom-up +/// IFDS Taint Analysis with Optimized Data-flow Encoding" by Schiebel and +/// Bodden. template class MonoIFDSSolver { public: using n_t = typename ProblemT::ProblemAnalysisDomain::n_t; @@ -147,7 +150,7 @@ template class MonoIFDSSolver { auto &Ret = Mapping[CalleeSrc]; if (ComputedMappings.tryInsert(CalleeSrc)) { - auto &&CSFacts = LocalProblem.invReturnFlow( + auto &&CSFacts = LocalProblem.invCallFlow( CallInst, CalleeSum.SourceFactIds[CalleeSrc]); for (const auto *Fact : CSFacts) { @@ -308,6 +311,7 @@ template class MonoIFDSSolver { repropagateLeaks(IState, CurrSCC); } + /// Lines 1-3 in Algorithm 4 void submitInitialSeeds(IntermediateState &IState, auto &Driver, Compressor &SeedCompressor, ByConstRef Fun) { @@ -329,6 +333,7 @@ template class MonoIFDSSolver { } } + /// Delayed Line 39 in Algorithm 4 void rescheduleCalls(IntermediateState &IState, auto &Driver) { if (!IState.InRecursion) { return; @@ -353,6 +358,7 @@ template class MonoIFDSSolver { IState.HasNewSummary.clear(); } + /// RepropagateLeaks procedure in Algorithm 2 void repropagateLeaks(IntermediateState &IState, SCCId CurrSCC) { llvm::SmallDenseSet NewLeaksWL; while (!IState.HasNewLeaks.empty()) { @@ -365,6 +371,7 @@ template class MonoIFDSSolver { } } + /// Continuation of RepropagateLeaks procedure in Algorithm 2 void handleLeaksForFun(IntermediateState &IState, SCCId CurrSCC, FunctionId CurrFun) { const auto *Fun = (*Functions)[CurrFun]; @@ -410,15 +417,13 @@ template class MonoIFDSSolver { analyzeBlockImpl(IState, Driver, BlockStart, LocalStateRef); } + /// Procedure AnalyzeBlock (Lines 8-11+14 in Algorithm 4) void analyzeBlockImpl(IntermediateState &IState, auto &Driver, ByConstRef BlockStart, DataFlowEnvironment LocalState) { auto CurrFunId = Functions->get(BlockStart->getFunction()); - // const bool EnableAggressiveLoopPriorization = - // Config.EnableAggressiveLoopPriorization; - Nullable CurrInst = BlockStart; do { @@ -494,19 +499,7 @@ template class MonoIFDSSolver { // Merge if (Inserted || tryMergeStates(SuccBBStateIt->second, LocalState)) { SuccBBStateIt->second.Version++; - - // note: HasSingleSucc implies here that UniqueSucc==nullptr - - // TODO: Should we support EnableAggressiveLoopPriorization outside of - // LLVM? It did not show significant performance benefits, though - - // if (EnableAggressiveLoopPriorization && HasSingleSucc && - // Block->getTerminator()->hasMetadata(llvm::LLVMContext::MD_loop)) - // { - // UniqueSucc = Succ; - // } else { Driver.push(Succ); - // } } } @@ -521,6 +514,7 @@ template class MonoIFDSSolver { } while (CurrInst); } + /// Lines 15-20 in Algorithm 4 void analyzeInstruction(IntermediateState &IState, DataFlowEnvironment &LocalState, FunctionId CurrFunId, ByConstRef Inst) { @@ -538,6 +532,8 @@ template class MonoIFDSSolver { IState.LocalProblem.normalFlow(LocalState, Inst); } + /// Procedure AnalyzeExit (Lines 35-38 in Algorithm 4, Line 39 is delayed to + /// rescheduleCalls()) void analyzeExitInst(IntermediateState &IState, DataFlowEnvironment &LocalState, FunctionId CurrFunId, ByConstRef Inst) { @@ -570,14 +566,15 @@ template class MonoIFDSSolver { } } + /// Procedure AnalyzeCall (Lines 21-34 in Algorithm 4) void analyzeCallInst(IntermediateState &IState, DataFlowEnvironment &LocalState, FunctionId CurrFunId, ByConstRef Inst) { const auto &Callees = ICF->getCalleesOfCallAt(Inst); - const auto CSInfo = handleCallSrcSinksAndMayRecurse( - IState, LocalState, Callees, CurrFunId, Inst); + auto CSInfo = handleCallSrcSinksAndMayRecurse(IState, LocalState, Callees, + CurrFunId, Inst); if (CSInfo.MayRecurse) { IState.InRecursion = true; @@ -590,7 +587,15 @@ template class MonoIFDSSolver { // Collect all data-flows that need to be propagated. Don't update // LocalState in-place + if (IState.LocalProblem.summaryFlow(std::as_const(LocalState), + CollectedSummary, Inst, CalleeFun)) { + continue; + } + auto CalleeId = Functions->get(CalleeFun); + if (ICF->getStartPointsOf(CalleeFun).empty()) { + CSInfo.CanCTR = false; + } applySummary(IState, std::as_const(LocalState), CollectedSummary, CalleeId, Inst, CurrFunId); } @@ -601,6 +606,7 @@ template class MonoIFDSSolver { mergeStates(LocalState, std::move(CollectedSummary)); } + /// Lines 26-32 in Algorithm 4 void applySummary(IntermediateState &IState, const DataFlowEnvironment &In, DataFlowEnvironment &LocalState, FunctionId CalleeId, @@ -650,10 +656,6 @@ template class MonoIFDSSolver { bool MayRecurse = false; bool CanCTR = !Callees.empty(); for (f_t CalleeFun : Callees) { - if (ICF->getStartPointsOf(CalleeFun).empty()) { - CanCTR = false; - } - auto CalleeId = Functions->get(CalleeFun); auto CalleeSCC = SCCs.SCCOfNode[CalleeId]; if (CalleeSCC == CurrSCC) { @@ -721,6 +723,7 @@ template class MonoIFDSSolver { } } + // PropagateLeaks procedure in Algorithm 4 void reportOrPropagateLeak(IntermediateState &IState, FunctionId CurrFunId, n_t LeakInst, d_t LeakFact, SourceFactSet From) { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index 6462cc261b..e52874ef9e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -27,6 +27,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include +#include namespace psr::monoifds { class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { @@ -57,7 +58,14 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { void normalFlow(DataFlowEnvironment &InOut, n_t Curr); void callToRetFlow(DataFlowEnvironment &InOut, n_t Curr); [[nodiscard]] llvm::SmallVector returnFlow(n_t CallSite, d_t Fact); - [[nodiscard]] llvm::SmallVector invReturnFlow(n_t CallSite, d_t Fact); + [[nodiscard]] llvm::SmallVector invCallFlow(n_t CallSite, d_t Fact); + [[nodiscard]] std::false_type + summaryFlow(const DataFlowEnvironment & /*In*/, + DataFlowEnvironment & /*Out*/, n_t /*Curr*/, + f_t /*Callee*/) { + // No propagators defined so far + return {}; + } [[nodiscard]] d_t getZeroValue() const { return LLVMZeroValue::getInstance(); diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 08224daa6b..99b1f43f0d 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -184,8 +184,7 @@ auto monoifds::TaintAnalysis::LocalAnalysis::returnFlow(n_t CallSite, d_t Fact) return {CallSite}; } -auto monoifds::TaintAnalysis::LocalAnalysis::invReturnFlow(n_t CallSite, - d_t Fact) +auto monoifds::TaintAnalysis::LocalAnalysis::invCallFlow(n_t CallSite, d_t Fact) -> llvm::SmallVector { if (llvm::isa(Fact)) { // Pass global variables as is, if desired From 79fe2458d6601bbfe4d8e144cd6616ee609ab585 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 10 Apr 2026 13:25:28 +0200 Subject: [PATCH 08/29] Add unittest for MonoIFDS and make it pass --- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 32 +- .../PhasarLLVM/DataFlow/MonoIfds/AliasCache.h | 2 + include/phasar/Utils/AnalysisPrinterBase.h | 2 +- .../DataFlow/MonoIfds/AliasCache.cpp | 6 + .../Problems/MonoIFDSTaintAnalysis.cpp | 8 +- unittests/PhasarLLVM/DataFlow/CMakeLists.txt | 1 + .../Problems/IFDSTaintAnalysisTest.cpp | 44 +-- .../Problems/MonoIFDSTaintAnalysisTest.cpp | 306 +++++++++++++++++- unittests/TestUtils/TaintTest.h | 51 +++ 9 files changed, 393 insertions(+), 59 deletions(-) create mode 100644 unittests/TestUtils/TaintTest.h diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 79210a0778..236a307e24 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -45,10 +45,16 @@ namespace psr::monoifds { +class MonoIFDFSSolverBase { +public: + static constexpr llvm::StringLiteral LogCategory = "MonoIFDSSolver"; +}; + /// \brief Implements the MonoIFDS algorithm, as presented in "Scaling Bottom-up /// IFDS Taint Analysis with Optimized Data-flow Encoding" by Schiebel and /// Bodden. -template class MonoIFDSSolver { +template +class MonoIFDSSolver : public MonoIFDFSSolverBase { public: using n_t = typename ProblemT::ProblemAnalysisDomain::n_t; using d_t = typename ProblemT::ProblemAnalysisDomain::d_t; @@ -56,8 +62,6 @@ template class MonoIFDSSolver { using f_t = typename ProblemT::ProblemAnalysisDomain::f_t; using v_t = typename ProblemT::ProblemAnalysisDomain::v_t; - static constexpr llvm::StringLiteral LogCategory = "MonoIFDSSolver"; - explicit MonoIFDSSolver(ProblemT *Problem, const i_t *ICF, std::pmr::polymorphic_allocator<> Alloc = std::pmr::get_default_resource()) @@ -276,7 +280,7 @@ template class MonoIFDSSolver { }); assert(Driver.empty()); - llvm::errs() << '.'; + // llvm::errs() << '.'; }; const auto RepropagateInRecursion = [&](auto &Driver) { @@ -287,7 +291,7 @@ template class MonoIFDSSolver { assert(Driver.empty()); rescheduleCalls(IState, Driver); - llvm::errs() << '.'; + // llvm::errs() << '.'; } assert(IState.HasNewSummary.empty() && @@ -663,11 +667,16 @@ template class MonoIFDSSolver { IState.Incoming[CalleeFun].insert(Inst); } + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[handleCallSrcSinksAndMayRecurse]: At call to " + << FToString(CalleeFun)); + IState.LocalProblem.requestedEffectAtCall( Inst, CalleeFun, [&](ByConstRef LeakFact) { - PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - "[handleCallSrcSinksAndMayRecurse]: LeakFact: " - << DToString(LeakFact)); + PHASAR_LOG_LEVEL_CAT( + DEBUG, LogCategory, + "[handleCallSrcSinksAndMayRecurse]: LeakFact: " + << DToString(LeakFact)); if (const auto *LeakSrc = psr::getOrNull(LocalState, LeakFact)) { reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); @@ -677,9 +686,10 @@ template class MonoIFDSSolver { // Generate taints from zero: IState.LocalProblem.generateFactsAtCall( Inst, CalleeFun, [&](ByConstRef GenFact) { - PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - "[handleCallSrcSinksAndMayRecurse]: GenFact: " - << DToString(GenFact)); + PHASAR_LOG_LEVEL_CAT( + DEBUG, LogCategory, + "[handleCallSrcSinksAndMayRecurse]: GenFact: " + << DToString(GenFact)); // Note: Assume, this gets called for all relevant aliases as well LocalState[GenFact].insert(SourceFactId(0)); }); diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h index 2f43603a1d..660a86f077 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h @@ -29,6 +29,8 @@ namespace psr::monoifds { class AliasCache { public: + static constexpr llvm::StringLiteral LogCategory = "monoifds::AliasCache"; + // Passed AI should already be FilteredAliasSet or similar explicit AliasCache( LLVMAliasIteratorRef AI [[clang::lifetime_capture_by(this)]], diff --git a/include/phasar/Utils/AnalysisPrinterBase.h b/include/phasar/Utils/AnalysisPrinterBase.h index f7689d20a5..2a231cd973 100644 --- a/include/phasar/Utils/AnalysisPrinterBase.h +++ b/include/phasar/Utils/AnalysisPrinterBase.h @@ -14,11 +14,11 @@ namespace psr { /// \brief A generic class that serves as the basis for a custom analysis /// printer implementation. template class AnalysisPrinterBase { +public: using n_t = typename AnalysisDomainTy::n_t; using d_t = typename AnalysisDomainTy::d_t; using l_t = typename AnalysisDomainTy::l_t; -public: template void onResult(n_t Instr, D &&DfFact, L &&LatticeElement, DataFlowAnalysisType AnalysisType) { diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp index d031b9c656..5b6ec07856 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/AliasCache.cpp @@ -1,5 +1,8 @@ #include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/Printer.h" + #include "llvm/IR/Instruction.h" using namespace psr; @@ -14,13 +17,16 @@ monoifds::AliasCache::getAliasSet(const llvm::Value *Fact, Accesses++; if (Inserted) { Misses++; + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "For " << DToString(Fact)); AI.forallAliasesOf(Fact, At, [this, &Vec = It->second](const auto *Alias) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Alias " << DToString(Alias)); const auto *AliasBase = Alias->stripPointerCastsAndAliases(); if (const auto *Glob = llvm::dyn_cast(AliasBase); Glob && !PermittedGlobals->contains(Glob)) { return; } if (!SkipSeedsCallBack || !SkipSeedsCallBack(Alias)) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " --> add"); Vec.push_back(Alias); } }); diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 99b1f43f0d..3fc8eafecf 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -278,8 +278,12 @@ void monoifds::TaintAnalysis::LocalAnalysis::generateFactsAtCall( n_t CS, f_t Callee, llvm::function_ref GenFact) { forallGeneratedFacts(*TA->Config, llvm::cast(CS), Callee, [this, CS, GenFact](const auto *Fact) { - auto Aliases = AC.getAliasSet(Fact, CS); - llvm::for_each(Aliases, GenFact); + if (Fact->getType()->isPointerTy()) { + auto Aliases = AC.getAliasSet(Fact, CS); + llvm::for_each(Aliases, GenFact); + } else { + std::invoke(GenFact, Fact); + } }); } diff --git a/unittests/PhasarLLVM/DataFlow/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/CMakeLists.txt index 3af5da4864..f493bf26c7 100644 --- a/unittests/PhasarLLVM/DataFlow/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(IfdsIde) add_subdirectory(Mono) +add_subdirectory(MonoIfds) add_subdirectory(PathSensitivity) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp index 25addb34de..a20bd5398b 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysisTest.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Value.h" #include "SrcCodeLocationEntry.h" +#include "TaintTest.h" #include "TestConfig.h" #include "gtest/gtest.h" @@ -34,49 +35,6 @@ class IFDSTaintAnalysisTest : public ::testing::Test { std::optional TaintProblem; std::optional TSF; - static bool isDummySrcFun(llvm::StringRef Name) { - return Name == "_Z6sourcev" || Name == "source"; - } - static bool isDummySinkFun(llvm::StringRef Name) { - return Name == "_Z4sinki" || Name == "sink"; - } - static LLVMTaintConfig getDefaultConfig() { - auto SourceCB = [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - isDummySrcFun(Call->getCalledFunction()->getName())) { - Ret.insert(Call); - } - return Ret; - }; - auto SinkCB = [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - isDummySinkFun(Call->getCalledFunction()->getName())) { - assert(Call->arg_size() > 0); - Ret.insert(Call->getArgOperand(0)); - } - return Ret; - }; - return LLVMTaintConfig(std::move(SourceCB), std::move(SinkCB)); - } - - static LLVMTaintConfig getDoubleFreeConfig() { - auto SourceCB = [](const llvm::Instruction *Inst) { - std::set Ret; - if (const auto *Call = llvm::dyn_cast(Inst); - Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "free") { - Ret.insert(Call->getArgOperand(0)); - } - return Ret; - }; - - return LLVMTaintConfig(SourceCB, SourceCB); - } - void initialize(const llvm::Twine &IRFile) { HA.emplace(IRFile, EntryPoints); diff --git a/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp index 56bddbbf98..d620b9e0a8 100644 --- a/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysisTest.cpp @@ -1,8 +1,310 @@ -#include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" +#include "phasar/ControlFlow/CGSCCs.h" +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/Pointer/CachedLLVMAliasIterator.h" +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasIterator.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" +#include "phasar/Utils/AnalysisPrinterBase.h" +#include "phasar/Utils/DebugOutput.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/MapUtils.h" +#include "phasar/Utils/Printer.h" + +#include "llvm/ADT/Twine.h" + +#include "SrcCodeLocationEntry.h" +#include "TaintTest.h" +#include "TestConfig.h" #include "gtest/gtest.h" -// TODO +#include +#include +#include + +namespace { +using namespace psr; +using namespace psr::unittest; + +class GroundTruthCollector + : public AnalysisPrinterBase< + monoifds::TaintAnalysis::ProblemAnalysisDomain> { +public: + using base_t = + AnalysisPrinterBase; + + using typename base_t::d_t; + using typename base_t::n_t; + + GroundTruthCollector(std::map> GroundTruth, + std::source_location Loc) + : GroundTruth(std::move(GroundTruth)), Loc(Loc) {} + + void doOnResult(n_t Inst, d_t Fact, l_t /*Value*/, + DataFlowAnalysisType /*TAType*/) override { + auto *Inner = getOrNull(GroundTruth, Inst); + ASSERT_TRUE(Inner && Inner->erase(Fact)) + << "Unexpected fact " << DToString(Fact) << " found at " + << NToString(Inst) << ";\nCalled from " << loc(); + if (Inner && Inner->empty()) { + GroundTruth.erase(Inst); + } + } + + void doOnFinalize() override { + ASSERT_TRUE(GroundTruth.empty()) + << "Expected facts not found: " << PrettyPrinter{GroundTruth} + << ";\nCalled from " << loc(); + } + + [[nodiscard]] std::string loc() const { + return std::string(Loc.file_name()) + ":" + std::to_string(Loc.line()) + + ":" + std::to_string(Loc.column()); + } + + std::map> GroundTruth; + std::source_location Loc; +}; + +class MonoIFDSTaintAnalysisTest : public ::testing::Test { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("taint_analysis/"); + static inline const std::vector EntryPoints = {"main"}; + + using GroundTruthTy = + std::map>; + + // void SetUp() override { + // psr::Logger::initializeStderrLogger( + // SeverityLevel::DEBUG, + // monoifds::MonoIFDFSSolverBase::LogCategory.str()); + // } + + void doAnalysisAndCompareResults( + const llvm::Twine &IRFile, const GroundTruthTy &GroundTruth, + LLVMTaintConfig *CustomConfig = nullptr, + std::source_location Loc = std::source_location::current()) { + std::optional ConfigBuf; + const auto &Config = + CustomConfig ? *CustomConfig : ConfigBuf.emplace(getDefaultConfig()); + + HelperAnalyses HA(PathToLlFiles + IRFile, EntryPoints, + {.CGTy = CallGraphAnalysisType::VTA}); + ASSERT_TRUE(HA.getProjectIRDB().isValid()); + + auto &IRDB = HA.getProjectIRDB(); + auto &ICF = HA.getICFG(); + + const auto &CG = ICF.getCallGraph(); + auto FC = compressFunctions(CG, psr::getEntryFunctions(IRDB, EntryPoints)); + + auto SCCs = computeCGSCCs(CG, ICF, FC); + auto SCCCallers = computeCGSCCCallers(CG, ICF, FC, SCCs); + + auto UsedGlobals = computeUsedGlobals(IRDB, FC, SCCs, SCCCallers); + + FilteredLLVMAliasIterator FAI(&HA.getAliasInfo()); + CachedLLVMAliasIterator CAI(&FAI); + GroundTruthCollector GT(convertTestingLocationSetMapInIR(GroundTruth, IRDB), + Loc); + monoifds::TaintAnalysis TA(&Config, &UsedGlobals, &CAI); + TA.setAnalysisPrinter(>); + monoifds::MonoIFDSSolver Solver(&TA, &ICF); + Solver.setCGSCCs(&SCCs).setFunctionCompressor(&FC); + GT.onInitialize(); + Solver.solve(); + + GT.onFinalize(); + } +}; + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_01) { + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 6, .Col = 8, .InFunction = "main"}; + doAnalysisAndCompareResults("dummy_source_sink/taint_01_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_01_m2r) { + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 5, .Col = 11, .InFunction = "main"}; + doAnalysisAndCompareResults("dummy_source_sink/taint_01_cpp_m2r_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_02) { + // source() is not called, so no leak + doAnalysisAndCompareResults("dummy_source_sink/taint_02_cpp_dbg.ll", {}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_03) { + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 6, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_03_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_04) { + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 6, .Col = 8, .InFunction = "main"}; + auto EntryThree = LineColFun{.Line = 8, .Col = 3, .InFunction = "main"}; + auto EntryFour = LineColFun{.Line = 8, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_04_cpp_dbg.ll", + { + {Entry, {EntryTwo}}, + {EntryThree, {EntryFour}}, + }); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_05) { + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 6, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_05_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_06) { + // source() is not called, so no leak + doAnalysisAndCompareResults("dummy_source_sink/taint_06_cpp_m2r_dbg.ll", {}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, SRetTest_01) { + auto SinkCall = LineColFun{.Line = 21, .Col = 3, .InFunction = "main"}; + auto BsdataAt0 = LineColFunOp{ + .Line = 21, + .Col = 8, + .InFunction = "main", + .OpCode = llvm::Instruction::Load, + }; + + doAnalysisAndCompareResults("dummy_source_sink/sret_c_dbg.ll", + {{SinkCall, {BsdataAt0}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_01) { + auto Entry = LineColFun{.Line = 12, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 12, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_01_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_01_m2r) { + auto Entry = LineColFun{.Line = 12, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 10, .Col = 14, .InFunction = "main"}; + + doAnalysisAndCompareResults( + "dummy_source_sink/taint_exception_01_cpp_m2r_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_02) { + // source() is not called, so no leak + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_02_cpp_dbg.ll", + {}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_03) { + auto Entry = LineColFun{.Line = 11, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 11, .Col = 8, .InFunction = "main"}; + auto EntryThree = LineColFun{.Line = 14, .Col = 3, .InFunction = "main"}; + auto EntryFour = LineColFun{.Line = 14, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_03_cpp_dbg.ll", + { + {Entry, {EntryTwo}}, + {EntryThree, {EntryFour}}, + }); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_04) { + auto Entry = LineColFun{.Line = 16, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 16, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_04_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_05) { + auto Entry = LineColFun{.Line = 16, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 16, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_05_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_06) { + auto Entry = LineColFun{.Line = 13, .Col = 5, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 13, .Col = 10, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_06_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_07) { + auto Entry = LineColFun{.Line = 14, .Col = 5, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 14, .Col = 10, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_07_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_08) { + auto Entry = LineColFun{.Line = 19, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 19, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_08_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_09) { + auto Entry = LineColFun{.Line = 20, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 20, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_09_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_ExceptionHandling_10) { + auto Entry = LineColFun{.Line = 19, .Col = 5, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 19, .Col = 10, .InFunction = "main"}; + + doAnalysisAndCompareResults("dummy_source_sink/taint_exception_10_cpp_dbg.ll", + {{Entry, {EntryTwo}}}); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_DoubleFree_01) { + auto DoubleFreeConf = getDoubleFreeConfig(); + auto Entry = LineColFun{.Line = 6, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 6, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("double_free_01_c_dbg.ll", {{Entry, {EntryTwo}}}, + &DoubleFreeConf); +} + +TEST_F(MonoIFDSTaintAnalysisTest, TaintTest_DoubleFree_02) { + auto DoubleFreeConf = getDoubleFreeConfig(); + auto Entry = LineColFun{.Line = 8, .Col = 3, .InFunction = "main"}; + auto EntryTwo = LineColFun{.Line = 8, .Col = 8, .InFunction = "main"}; + + doAnalysisAndCompareResults("double_free_02_c_dbg.ll", {{Entry, {EntryTwo}}}, + &DoubleFreeConf); +} + +} // namespace int main(int Argc, char **Argv) { ::testing::InitGoogleTest(&Argc, Argv); diff --git a/unittests/TestUtils/TaintTest.h b/unittests/TestUtils/TaintTest.h new file mode 100644 index 0000000000..2371209043 --- /dev/null +++ b/unittests/TestUtils/TaintTest.h @@ -0,0 +1,51 @@ +#pragma once + +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/InstrTypes.h" + +namespace psr::unittest { +inline bool isDummySrcFun(llvm::StringRef Name) { + return Name == "_Z6sourcev" || Name == "source"; +} +inline bool isDummySinkFun(llvm::StringRef Name) { + return Name == "_Z4sinki" || Name == "sink"; +} +inline LLVMTaintConfig getDefaultConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + isDummySrcFun(Call->getCalledFunction()->getName())) { + Ret.insert(Call); + } + return Ret; + }; + auto SinkCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + isDummySinkFun(Call->getCalledFunction()->getName())) { + assert(Call->arg_size() > 0); + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + return LLVMTaintConfig(std::move(SourceCB), std::move(SinkCB)); +} + +inline LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); +} +} // namespace psr::unittest From f281094523bb804beb24b7eabd35afd3af23f1e4 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 13 Apr 2026 13:39:43 +0200 Subject: [PATCH 09/29] Add some more comments --- include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h | 2 ++ include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h | 3 ++- include/phasar/DataFlow/MonoIfds/IterationStrategy.h | 8 +++++--- include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h | 8 +++++++- include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h | 3 ++- include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 2 +- .../Controller/AnalysisControllerXMonoIFDSTaint.cpp | 2 +- 7 files changed, 20 insertions(+), 8 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h index 294f866e2e..b78a8ea488 100644 --- a/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h @@ -12,6 +12,8 @@ #include "phasar/Utils/ArraySet.h" namespace psr::monoifds { + +/// \brief Simple worklist, based on a de-duplicating queue template class ArraySetDriver { public: void push(ItemT Item) { WL.insert(std::move(Item)); } diff --git a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h index 8deb2f11d2..f8103e0a44 100644 --- a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h +++ b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h @@ -21,7 +21,8 @@ namespace psr::monoifds { using SourceFactSet = BitSet; /// The local analysis state: TargetFact-->{SourceFact} - +/// +/// \tparam D The type of (target-) data-flow facts template struct DataFlowEnvironment : llvm::SmallDenseMap { using llvm::SmallDenseMap::SmallDenseMap; diff --git a/include/phasar/DataFlow/MonoIfds/IterationStrategy.h b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h index 159c6c9b36..51c5109b1f 100644 --- a/include/phasar/DataFlow/MonoIfds/IterationStrategy.h +++ b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h @@ -15,11 +15,15 @@ #include namespace psr::monoifds { +/// Iteration strategy for intra-procedural propagation enum class IterationStrategy : uint8_t { + /// Simple de-duplicating queue (See ArraySetDriver) DedupFIFOQueue, + /// Reverse-Post-Order queue (see TopoFixpointDriver) TopoPrioQueue, + /// Hybrid of TopoPrioQueue in singleton-CG-SCCs and DedupFIFOQueue for larger + /// CG-SCCs Hybrid, - HybridCapped, }; [[nodiscard]] constexpr llvm::StringRef @@ -31,8 +35,6 @@ to_string(IterationStrategy IterStrategy) noexcept { return "topo"; case IterationStrategy::Hybrid: return "hybrid"; - case IterationStrategy::HybridCapped: - return "hybrid-capped"; } llvm_unreachable("All valid IterationStrategy alternatives should be handled " "in the switch above"); diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h index 6a09815030..a8aef1e706 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h @@ -12,10 +12,16 @@ #include "phasar/DataFlow/MonoIfds/IterationStrategy.h" namespace psr::monoifds { +/// Dynamic configuration for the MonoIFDSSolver struct MonoIfdsConfig { + /// Iteration strategy for intra-procedural propagations IterationStrategy IterStrategy = IterationStrategy::DedupFIFOQueue; - bool EnableAggressiveLoopPriorization = false; + /// Dataflow-Environment versioning. Detects and skips redundant propagations + /// at runtime bool EnableEnvVersioning = false; + /// Whether to re-schedule the analysis of recursive call-sites whenever the + /// callee's summary changes (true), or defering re-scheduling of call-sites + /// until all callee-local paths have been analyzed (false). bool EagerReturnPropagation = false; }; diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index ed094fc180..f3fdbfcaba 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -13,7 +13,7 @@ #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" -#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/SCCId.h" #include "phasar/Utils/TypeTraits.h" #include @@ -36,6 +36,7 @@ namespace psr::monoifds { template concept MonoIFDSAnalysisDomain = IsAnalysisDomain; +/// \brief CG-SCC-local analysis template concept LocalMonoIFDSProblem = requires(T &Problem, DataFlowEnvironment &InOut, diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 236a307e24..bdbbdeeff5 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -250,7 +250,7 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { return false; } - if (IterStrategy == IterationStrategy::HybridCapped) { + if (IterStrategy == IterationStrategy::Hybrid) { // return SCCSize < 20; return SCCSize == 1; } diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp index e84b9a4c80..00ea3df62f 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -47,7 +47,7 @@ void controller::executeMonoIFDSTaint(AnalysisController &Data) { if (Data.EmitterOptions & AnalysisControllerEmitterOptions::EmitStatisticsAsText) { MeasureTime.emplace([](auto Elapsed) { - llvm::outs() << "Elapsed: " << hms{Elapsed} << '\n'; + llvm::outs() << "MonoIFDSSolver Elapsed: " << hms{Elapsed} << '\n'; }); } From cae247c3eed6e9b2e4b45072b856faac600d435f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 14 Apr 2026 10:48:48 +0200 Subject: [PATCH 10/29] Add shouldBeInSummary --- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 6 +-- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 4 +- .../Problems/MonoIFDSTaintAnalysis.cpp | 49 +++++++++++++++++++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index bdbbdeeff5..cb155992b4 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -237,9 +237,6 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { void computeFixpointForSCC(SCCId CurrSCC, llvm::ArrayRef CurrFuns) { - PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - "[computeFixpointForSCC]: " << CurrSCC.Value); - const size_t SCCSize = CurrFuns.size(); const bool InRecursion = SCCSize > 1; IntermediateState IState(Problem, &PoolRes, CurrSCC, InRecursion); @@ -828,6 +825,9 @@ template void MonoIFDSSolver::solve() { // Step 3: Analyze each CG-SCC in isolation for (const auto &[SCC, CurrFuns] : SCCs->NodesInSCC.enumerate()) { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "[computeFixpointForSCC]: " << SCC.Value << '/' + << SCCs->size()); computeFixpointForSCC(SCC, CurrFuns); } } diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index e52874ef9e..9df7c8350c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -109,7 +109,9 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { Printer->onFinalize(OS); } - // TODO: shouldBeInSummary() + // Optional API function: Filter out facts that are do not need to go into a + // procedure summary + [[nodiscard]] bool shouldBeInSummary(d_t ExitFact, n_t ExitInst); private: MaybeUniquePtr> Printer = diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 3fc8eafecf..01aae63bdc 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -292,3 +292,52 @@ void monoifds::TaintAnalysis::LocalAnalysis::requestedEffectAtCall( forallLeakedFacts(*TA->Config, llvm::cast(CS), Callee, LeakFact); } + +bool monoifds::TaintAnalysis::shouldBeInSummary(d_t ExitFact, n_t ExitInst) { + if (llvm::isa(ExitFact)) { + // Global vars should be in summary + return !llvm::isa(ExitFact); + } + + const auto *RetStmt = llvm::dyn_cast(ExitInst); + if (RetStmt && RetStmt->getReturnValue() == ExitFact) { + // The return value should be in summary + return true; + } + + const auto *Fun = ExitInst->getFunction(); + if (Fun->isVarArg()) { + if (const auto *Alloc = llvm::dyn_cast(ExitFact)) { + const auto *AllocTy = Alloc->getAllocatedType(); + if (AllocTy->isArrayTy() && AllocTy->getArrayNumElements() > 0 && + AllocTy->getArrayElementType()->isStructTy() && + AllocTy->getArrayElementType()->getStructName() == + "struct.__va_list_tag") { + + return true; + } + } + } + + if (llvm::isa(ExitFact)) { + // Locals do not escape + return false; + } + + // Only output parameters can escape (i.e., pointer args) + if (!ExitFact->getType()->isPointerTy()) { + return false; + } + + if (const auto *Arg = llvm::dyn_cast(ExitFact)) { + if (Arg->hasByValAttr()) { + // This parameter is actually passed by value in the src code, just for + // ABI reasons it appears as being passed by pointer + return false; + } + + return true; + } + + return false; +} From 111713b2bf7941ce9ca22754d315d0c054411640 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 14 Apr 2026 11:08:58 +0200 Subject: [PATCH 11/29] minor --- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 2 +- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index 9df7c8350c..458e652d3e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -20,7 +20,7 @@ #include "phasar/Utils/Compressor.h" #include "phasar/Utils/MaybeUniquePtr.h" #include "phasar/Utils/NullAnalysisPrinter.h" -#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/SCCId.h" #include "phasar/Utils/UsedGlobalsHolder.h" #include "phasar/Utils/Utilities.h" diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 01aae63bdc..59b4da6e9e 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -7,7 +7,6 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/MapUtils.h" -#include "phasar/Utils/SCCGeneric.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -306,20 +305,12 @@ bool monoifds::TaintAnalysis::shouldBeInSummary(d_t ExitFact, n_t ExitInst) { } const auto *Fun = ExitInst->getFunction(); - if (Fun->isVarArg()) { - if (const auto *Alloc = llvm::dyn_cast(ExitFact)) { - const auto *AllocTy = Alloc->getAllocatedType(); - if (AllocTy->isArrayTy() && AllocTy->getArrayNumElements() > 0 && - AllocTy->getArrayElementType()->isStructTy() && - AllocTy->getArrayElementType()->getStructName() == - "struct.__va_list_tag") { - - return true; - } + + if (const auto *Alloc = llvm::dyn_cast(ExitFact)) { + if (Fun->isVarArg() && psr::isVaListAlloca(*Alloc)) { + return true; } - } - if (llvm::isa(ExitFact)) { // Locals do not escape return false; } From dd6cd593253c685a087a393a4b977d35ac01ed4b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 17 Apr 2026 14:53:29 +0200 Subject: [PATCH 12/29] Make ICFG independent from analysis domain for MonoIFDSSolver --- include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index cb155992b4..533daac01e 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -11,6 +11,7 @@ #include "phasar/ControlFlow/CFG.h" #include "phasar/ControlFlow/ControlFlowOrder.h" +#include "phasar/ControlFlow/ICFG.h" #include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" #include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" @@ -53,12 +54,13 @@ class MonoIFDFSSolverBase { /// \brief Implements the MonoIFDS algorithm, as presented in "Scaling Bottom-up /// IFDS Taint Analysis with Optimized Data-flow Encoding" by Schiebel and /// Bodden. -template +template class MonoIFDSSolver : public MonoIFDFSSolverBase { public: using n_t = typename ProblemT::ProblemAnalysisDomain::n_t; using d_t = typename ProblemT::ProblemAnalysisDomain::d_t; - using i_t = typename ProblemT::ProblemAnalysisDomain::i_t; + using i_t = ICFGTy; using f_t = typename ProblemT::ProblemAnalysisDomain::f_t; using v_t = typename ProblemT::ProblemAnalysisDomain::v_t; @@ -814,7 +816,8 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { llvm::SmallDenseMap> Leaks{}; }; -template void MonoIFDSSolver::solve() { +template +void MonoIFDSSolver::solve() { // Step 1: Check for pre-analysis results: If any of them is null, create them initialize(); From 1cbb3127fecaf4f18cc5ce079ab466ce813c0e81 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 17 Apr 2026 16:02:14 +0200 Subject: [PATCH 13/29] Add functionCompressor, usedGlobals, and CGSCCs to helperAnalyses to improve usability --- include/phasar/ControlFlow/CGSCCs.h | 29 +++++++++--- include/phasar/ControlFlow/ICFG.h | 9 +++- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 1 + include/phasar/PhasarLLVM/HelperAnalyses.h | 16 ++++++- include/phasar/Utils/FunctionId.h | 2 + include/phasar/Utils/UsedGlobalsHolder.h | 3 +- lib/PhasarLLVM/HelperAnalyses.cpp | 47 +++++++++++++++++++ lib/PhasarLLVM/Utils/UsedGlobals.cpp | 11 ++--- .../Controller/AnalysisControllerInternal.h | 3 +- .../AnalysisControllerXMonoIFDSTaint.cpp | 29 ++++-------- 10 files changed, 111 insertions(+), 39 deletions(-) diff --git a/include/phasar/ControlFlow/CGSCCs.h b/include/phasar/ControlFlow/CGSCCs.h index 60e1d6ab6c..b7061de4b9 100644 --- a/include/phasar/ControlFlow/CGSCCs.h +++ b/include/phasar/ControlFlow/CGSCCs.h @@ -11,8 +11,8 @@ #include "phasar/ControlFlow/CFG.h" #include "phasar/ControlFlow/CallGraph.h" +#include "phasar/ControlFlow/ICFG.h" #include "phasar/Utils/BitSet.h" -#include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/IotaIterator.h" #include "phasar/Utils/SCCGeneric.h" @@ -25,9 +25,9 @@ namespace psr { // the bottom-up iteration order. template C> requires InstructionClassifier -SCCHolder +[[nodiscard]] SCCHolder computeCGSCCs(const psr::CallGraph &CG, const C &CF, - const Compressor &Functions) { + const FunctionCompressor &Functions) { SCCHolder Ret{}; @@ -113,18 +113,25 @@ computeCGSCCs(const psr::CallGraph &CG, const C &CF, return Ret; } +template + requires InstructionClassifier +[[nodiscard]] SCCHolder +computeCGSCCs(const I &ICF, + const FunctionCompressor &Functions) { + return computeCGSCCs(ICF.getCallGraph(), ICF, Functions); +} + template -SCCDependencyGraph +[[nodiscard]] SCCDependencyGraph computeCGSCCCallers(const psr::CallGraph &CG, const CFGOf auto &CF, - const Compressor &Functions, + const FunctionCompressor &Functions, const SCCHolder &SCCs) { SCCDependencyGraph Ret; Ret.ChildrenOfSCC.resize(SCCs.size()); BitSet> Leaves(SCCs.size(), true); - for (auto FunId : iota(Functions.size())) { - const auto *Fun = Functions[FunId]; + for (auto [FunId, Fun] : Functions.enumerate()) { auto SCC = SCCs.SCCOfNode[FunId]; for (const auto &CS : CG.getCallersOf(Fun)) { @@ -146,4 +153,12 @@ computeCGSCCCallers(const psr::CallGraph &CG, const CFGOf auto &CF, return Ret; } +template +[[nodiscard]] SCCDependencyGraph +computeCGSCCCallers(const I &ICF, + const FunctionCompressor &Functions, + const SCCHolder &SCCs) { + return computeCGSCCCallers(ICF.getCallGraph(), ICF, Functions, SCCs); +} + } // namespace psr diff --git a/include/phasar/ControlFlow/ICFG.h b/include/phasar/ControlFlow/ICFG.h index d3660fe11f..d6129728a0 100644 --- a/include/phasar/ControlFlow/ICFG.h +++ b/include/phasar/ControlFlow/ICFG.h @@ -8,6 +8,7 @@ *****************************************************************************/ #pragma once +#include "phasar/ControlFlow/CFG.h" #include "phasar/ControlFlow/CallGraphBase.h" #include "phasar/Utils/Nullable.h" #include "phasar/Utils/TypeTraits.h" @@ -19,8 +20,8 @@ namespace psr { template -concept ICFG = requires(const T &ICF, llvm::StringRef Name, - typename T::n_t Inst, typename T::f_t Fun) { +concept ICFG = CFG && requires(const T &ICF, llvm::StringRef Name, + typename T::n_t Inst, typename T::f_t Fun) { typename T::f_t; typename T::n_t; @@ -61,4 +62,8 @@ concept ICFGDump = requires(const T &ICF, llvm::raw_ostream &OS) { ICF.print(OS); ICF.printAsJson(OS); }; + +template +concept ICFGOf = ICFG && std::same_as && + std::same_as; } // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 533daac01e..103d163490 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -10,6 +10,7 @@ *****************************************************************************/ #include "phasar/ControlFlow/CFG.h" +#include "phasar/ControlFlow/CGSCCs.h" #include "phasar/ControlFlow/ControlFlowOrder.h" #include "phasar/ControlFlow/ICFG.h" #include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" diff --git a/include/phasar/PhasarLLVM/HelperAnalyses.h b/include/phasar/PhasarLLVM/HelperAnalyses.h index 156e1622bf..ec75980e46 100644 --- a/include/phasar/PhasarLLVM/HelperAnalyses.h +++ b/include/phasar/PhasarLLVM/HelperAnalyses.h @@ -16,6 +16,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h" #include "phasar/Pointer/UnionFindAliasAnalysisType.h" +#include "phasar/Utils/FunctionId.h" #include "llvm/ADT/Twine.h" @@ -25,6 +26,7 @@ namespace llvm { class Module; +class GlobalVariable; } // namespace llvm namespace psr { @@ -32,7 +34,9 @@ class LLVMProjectIRDB; class DIBasedTypeHierarchy; class LLVMBasedICFG; class LLVMBasedCFG; -class LLVMAliasSet; +template struct SCCHolder; +template struct SCCDependencyGraph; +template struct UsedGlobalsHolder; class HelperAnalyses { // NOLINT(cppcoreguidelines-special-member-functions) public: @@ -66,6 +70,12 @@ class HelperAnalyses { // NOLINT(cppcoreguidelines-special-member-functions) [[nodiscard]] DIBasedTypeHierarchy &getTypeHierarchy(); [[nodiscard]] LLVMBasedICFG &getICFG(); [[nodiscard]] LLVMBasedCFG &getCFG(); + [[nodiscard]] FunctionCompressor & + getCompressedFunctions(); + [[nodiscard]] const SCCHolder &getCGSCCs(); + [[nodiscard]] const SCCDependencyGraph &getCGSCCCallers(); + [[nodiscard]] const UsedGlobalsHolder & + getUsedGlobals(); private: std::unique_ptr IRDB; @@ -73,6 +83,10 @@ class HelperAnalyses { // NOLINT(cppcoreguidelines-special-member-functions) std::unique_ptr TH; std::unique_ptr ICF; std::unique_ptr CFG; + std::unique_ptr> FC; + std::unique_ptr> SCCs; + std::unique_ptr> SCCCallers; + std::unique_ptr> UsedGlobals; // IRDB std::string IRFile; diff --git a/include/phasar/Utils/FunctionId.h b/include/phasar/Utils/FunctionId.h index a063b1ee48..ddba7b3bd0 100644 --- a/include/phasar/Utils/FunctionId.h +++ b/include/phasar/Utils/FunctionId.h @@ -20,4 +20,6 @@ PHASAR_STRONG_TYPEDEF(psr, uint32_t, FunctionId); namespace psr { std::string to_string(FunctionId FId); + +template using FunctionCompressor = Compressor; } // namespace psr diff --git a/include/phasar/Utils/UsedGlobalsHolder.h b/include/phasar/Utils/UsedGlobalsHolder.h index bcb297267c..d6e311a0c0 100644 --- a/include/phasar/Utils/UsedGlobalsHolder.h +++ b/include/phasar/Utils/UsedGlobalsHolder.h @@ -9,7 +9,8 @@ * Fabian Schiebel and others *****************************************************************************/ -#include "phasar/Utils/SCCGeneric.h" +#include "phasar/Utils/SCCId.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseSet.h" diff --git a/lib/PhasarLLVM/HelperAnalyses.cpp b/lib/PhasarLLVM/HelperAnalyses.cpp index 6897c6980e..3f1041a4f5 100644 --- a/lib/PhasarLLVM/HelperAnalyses.cpp +++ b/lib/PhasarLLVM/HelperAnalyses.cpp @@ -1,6 +1,8 @@ #include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/ControlFlow/CGSCCs.h" #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/RTAResolver.h" @@ -9,6 +11,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasSetData.h" #include "phasar/PhasarLLVM/Pointer/LLVMUnionFindAliasSet.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" #include "phasar/Pointer/AliasAnalysisType.h" #include @@ -129,4 +132,48 @@ LLVMBasedCFG &HelperAnalyses::getCFG() { return *CFG; } +FunctionCompressor & +HelperAnalyses::getCompressedFunctions() { + if (!FC) { + auto Funs = compressFunctions( + getICFG().getCallGraph(), + psr::getEntryFunctions(getProjectIRDB(), EntryPoints)); + FC = std::make_unique>( + std::move(Funs)); + } + + return *FC; +} + +const SCCHolder &HelperAnalyses::getCGSCCs() { + if (!SCCs) { + auto &ICF = getICFG(); + auto CGSCCs = computeCGSCCs(ICF, getCompressedFunctions()); + SCCs = std::make_unique>(std::move(CGSCCs)); + } + return *SCCs; +} + +const SCCDependencyGraph &HelperAnalyses::getCGSCCCallers() { + if (!SCCCallers) { + auto SCCC = + computeCGSCCCallers(getICFG(), getCompressedFunctions(), getCGSCCs()); + SCCCallers = + std::make_unique>(std::move(SCCC)); + } + return *SCCCallers; +} + +const UsedGlobalsHolder & +HelperAnalyses::getUsedGlobals() { + if (!UsedGlobals) { + auto UG = computeUsedGlobals(getProjectIRDB(), getCompressedFunctions(), + getCGSCCs(), getCGSCCCallers()); + UsedGlobals = + std::make_unique>( + std::move(UG)); + } + return *UsedGlobals; +} + } // namespace psr diff --git a/lib/PhasarLLVM/Utils/UsedGlobals.cpp b/lib/PhasarLLVM/Utils/UsedGlobals.cpp index ca7f766eb6..3217f690a4 100644 --- a/lib/PhasarLLVM/Utils/UsedGlobals.cpp +++ b/lib/PhasarLLVM/Utils/UsedGlobals.cpp @@ -1,16 +1,15 @@ #include "phasar/PhasarLLVM/Utils/UsedGlobals.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/SCCGeneric.h" -#include "llvm/IR/Module.h" - -#include -#include -#include -#include +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" using namespace psr; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 539b7d1439..0d80742e8a 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -19,12 +19,11 @@ #include "phasar/Utils/IO.h" #include "phasar/Utils/Timer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include "AnalysisController.h" -#include - namespace psr { template class IDESolver; } // namespace psr diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp index 00ea3df62f..ce01c16bf3 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -7,39 +7,28 @@ * Fabian Schiebel and others *****************************************************************************/ -#include "phasar/ControlFlow/CGSCCs.h" #include "phasar/DataFlow/MonoIfds/MonoIFDSSolver.h" -#include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" -#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" #include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" #include "phasar/PhasarLLVM/Pointer/CachedLLVMAliasIterator.h" #include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasIterator.h" -#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" -#include "AnalysisControllerInternalIDE.h" +#include "AnalysisController.h" +#include "AnalysisControllerInternal.h" using namespace psr; void controller::executeMonoIFDSTaint(AnalysisController &Data) { - auto Config = makeTaintConfig(Data); - - auto &IRDB = Data.HA->getProjectIRDB(); - auto &ICF = Data.HA->getICFG(); - - const auto &CG = ICF.getCallGraph(); - auto FC = - compressFunctions(CG, psr::getEntryFunctions(IRDB, Data.EntryPoints)); - - auto SCCs = computeCGSCCs(CG, ICF, FC); - auto SCCCallers = computeCGSCCCallers(CG, ICF, FC, SCCs); - - auto UsedGlobals = computeUsedGlobals(IRDB, FC, SCCs, SCCCallers); auto AI = Data.HA->getAliasInfo(); FilteredLLVMAliasIterator FAI(AI); CachedLLVMAliasIterator CAI(&FAI); - monoifds::TaintAnalysis TA(&Config, &UsedGlobals, &CAI); - monoifds::MonoIFDSSolver Solver(&TA, &ICF); + + auto Config = makeTaintConfig(Data); + monoifds::TaintAnalysis TA(&Config, &Data.HA->getUsedGlobals(), &CAI); + + monoifds::MonoIFDSSolver Solver(&TA, &Data.HA->getICFG()); + auto &FC = Data.HA->getCompressedFunctions(); + const auto &SCCs = Data.HA->getCGSCCs(); Solver.setCGSCCs(&SCCs).setFunctionCompressor(&FC); { From 04b2a99a5ddce1d7baf40950dbeff7943c200c64 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 27 Apr 2026 16:40:14 +0200 Subject: [PATCH 14/29] minor --- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 5 +++-- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index f3fdbfcaba..b7c3015b69 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -21,14 +21,15 @@ /// \file /// Defines requirements for an analysis problem that can be solved by the -/// MonoIFDS solver. +/// MonoIFDSSolver. +/// /// Since MonoIFDS solves analyses bottom-up, each CG-SCC is analyzed in /// isolation. The solver will call Problem.localAnalysis(...) once per SCC and /// use the returned LocalMonoIFDSProblem to drive the solving process. /// Although the solver currently is only single-threaded, you can expect that /// (also in a multi-threaded future) LocalMonoIFDSProblem instances are not /// accessed by multiple threads at a time, while different instances may be -/// solved on different threads. +/// solved on different threads in parallel. namespace psr::monoifds { diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 103d163490..d92dd355c8 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -33,6 +33,7 @@ #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -63,12 +64,12 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { using d_t = typename ProblemT::ProblemAnalysisDomain::d_t; using i_t = ICFGTy; using f_t = typename ProblemT::ProblemAnalysisDomain::f_t; - using v_t = typename ProblemT::ProblemAnalysisDomain::v_t; explicit MonoIFDSSolver(ProblemT *Problem, const i_t *ICF, std::pmr::polymorphic_allocator<> Alloc = std::pmr::get_default_resource()) - : Problem(Problem), ICF(ICF), MBufRes(Alloc.resource()) {} + : Problem(&assertNotNull(Problem)), ICF(&assertNotNull(ICF)), + MBufRes(Alloc.resource()) {} MonoIFDSSolver &setConfig(MonoIfdsConfig Config) & noexcept { this->Config = Config; @@ -76,12 +77,14 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { } MonoIFDSSolver &setCGSCCs(const SCCHolder *SCCs) & noexcept { + assertNotNull(SCCs); this->SCCs = SCCs; return *this; } MonoIFDSSolver &setFunctionCompressor( const Compressor *Functions) & noexcept { + assertNotNull(Functions); this->Functions = Functions; return *this; } @@ -203,17 +206,16 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { Problem->getEntryPoints() } -> psr::is_iterable_over_v; }) { - Functions = std::make_unique>( + Functions = std::make_unique>( compressFunctions(ICF->getCallGraph(), Problem->getEntryPoints())); } else if constexpr (requires() { { Problem->getEntryPoints() } -> psr::is_iterable_over_v; }) { - Functions = - std::make_unique>(compressFunctions( - ICF->getCallGraph(), - psr::getEntryFunctions(*ICF, Problem->getEntryPoints()))); + Functions = std::make_unique>(compressFunctions( + ICF->getCallGraph(), + psr::getEntryFunctions(*ICF, Problem->getEntryPoints()))); } else { throw std::logic_error("The analysis problem " + llvm::getTypeName().str() + @@ -224,6 +226,8 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { } void initializeSCCs() { + assert(Functions && "Functions have been initialized already (see " + "invocation order in initialize())"); SCCs = std::make_unique>( computeCGSCCs(ICF->getCallGraph(), *ICF, *Functions)); } @@ -723,7 +727,6 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { for (const auto &CS : getOrDefault(IState.Incoming, Fun)) { if (auto CallerId = Functions->getOrNull(CS->getFunction())) { - // Driver.push(CS); if (EnableEnvVersioning) { IState.PathEdges[CS].Version++; } From b160f02f122ed63718b283a2aacd1506233463ab Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 30 Apr 2026 11:48:30 +0200 Subject: [PATCH 15/29] Some somments + renaming --- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 36 +++++++++++++++---- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 10 ++---- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 8 +++-- .../Problems/MonoIFDSTaintAnalysis.cpp | 2 +- .../AnalysisControllerXMonoIFDSTaint.cpp | 6 ++-- 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index b7c3015b69..2a4975395d 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -50,12 +50,14 @@ concept LocalMonoIFDSProblem = /// /// Corresponds to the $flow()$ function in the paper. Problem.normalFlow(InOut, Inst); + /// Intra-procedural data-flow at call-sites. Input facts are passed-in as /// InOut; modifications are performed in-place. Kills facts that may be /// strongly updated by the callee. Don't use it to *generate* facts. /// /// Corresponds to the $callFlow()$ function in the paper. Problem.callToRetFlow(InOut, Inst); + /// Inter-procedural data-flow at exit-statements; Maps callee-facts back /// to the return-site in the caller. As with normal IFDS, this function /// will be called for each incoming Fact, that should be mapped back; @@ -88,13 +90,18 @@ concept LocalMonoIFDSProblem = Problem.summaryFlow(In, InOut, Inst, Fun) } -> std::convertible_to; - /// The special zero value, aka. $\Lambda$. Always holds. Facts that are - /// generated unconditionally originate from zero. + /// The special zero-value, aka. $\Lambda$. Always holds. Facts that are + /// generated unconditionally, originate from zero. + /// + /// Note: The solver guarantees that the zero-value always has + /// SourceFactId 0. { Problem.getZeroValue() } -> std::convertible_to; /// Approximates the source-facts that should hold at the entry of Fun. /// Input the facts in the InOut map as /// `SeedState[Fact].insert(SeedCompressor.getOrInsert(Fact))` + /// + /// Note: This is assumed to be a (conservative) over-approximation! Problem.initialSeeds(InOut, SeedCompressor, Fun); /// At a call-site Inst calling Fun, invokes the given callback for each @@ -111,16 +118,20 @@ concept LocalMonoIFDSProblem = Problem.generateFacts(Inst, [](const typename Dom::d_t & GenFact) {}); /// Invokes the given callback for each LeakFact for which the solver - /// should call onResult(Inst, LeakFact), if LeakFacts holds at Inst. + /// should later call onResult(Inst, LeakFact), if LeakFacts holds at + /// Inst. Here, Inst is assumed to be a call-site that may call Fun. /// /// Useful for taint sinks. - Problem.requestedEffectAtCall(Inst, Fun, - [](const typename Dom::d_t & LeakFact) {}); + Problem.requestResultCallbackAtCallSite( + Inst, Fun, [](const typename Dom::d_t & LeakFact) {}); + /// Invokes the given callback for each LeakFact for which the solver - /// should call onResult(Inst, LeakFact), if LeakFacts holds at Inst. + /// should later call onResult(Inst, LeakFact), if LeakFacts holds at + /// Inst. /// /// Useful for taint sinks. - Problem.requestedEffect(Inst, [](const typename Dom::d_t & LeakFact) {}); + Problem.requestResultCallback(Inst, + [](const typename Dom::d_t & LeakFact) {}); /// Notifies the problem that a previously requested leak-Fact now is /// known to hold at Inst. @@ -150,4 +161,15 @@ concept MonoIFDSProblem = /// Pretty-print the analysis results into the given llvm::raw_ostream. Problem.emitTextReport(OS); }; + +/// Optional requirement for a MonoIFDSProblem to better filter function +/// summaries. +template +concept HasShouldBeInSummary = + requires(T &Problem, typename T::ProblemAnalysisDomain::d_t ExitFact, + typename T::ProblemAnalysisDomain::n_t ExitInst) { + { + Problem.shouldBeInSummary(ExitFact, ExitInst) + } -> std::convertible_to; + }; } // namespace psr::monoifds diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index d92dd355c8..888bb2f41b 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -551,11 +551,7 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { auto &Sum = Summaries[CurrFunId].EndSummary; for (auto &&[ExitFact, ExitSrc] : LocalState) { - if constexpr (requires(ProblemT &P) { - { - P.shouldBeInSummary(ExitFact, Inst) - } -> std::convertible_to; - }) { + if constexpr (HasShouldBeInSummary) { if (!Problem->shouldBeInSummary(ExitFact, Inst)) { continue; } @@ -675,7 +671,7 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { "[handleCallSrcSinksAndMayRecurse]: At call to " << FToString(CalleeFun)); - IState.LocalProblem.requestedEffectAtCall( + IState.LocalProblem.requestResultCallbackAtCallSite( Inst, CalleeFun, [&](ByConstRef LeakFact) { PHASAR_LOG_LEVEL_CAT( DEBUG, LogCategory, @@ -708,7 +704,7 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { void handleSourceSinkConfig(IntermediateState &IState, DataFlowEnvironment &LocalState, FunctionId CurrFunId, n_t Inst) { - IState.LocalProblem.requestedEffect(Inst, [&](const auto &LeakFact) { + IState.LocalProblem.requestResultCallback(Inst, [&](const auto &LeakFact) { if (const auto *LeakSrc = getOrNull(LocalState, LeakFact)) { reportOrPropagateLeak(IState, CurrFunId, Inst, LeakFact, *LeakSrc); } diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index 458e652d3e..19263eb65e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -79,9 +79,11 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { void generateFacts(n_t CS, llvm::function_ref GenFact) { // XXX: Implement (was not necessary for paper eval) } - void requestedEffectAtCall(n_t CS, f_t Callee, - llvm::function_ref LeakFact); - void requestedEffect(n_t Inst, llvm::function_ref LeakFact) { + void + requestResultCallbackAtCallSite(n_t CS, f_t Callee, + llvm::function_ref LeakFact); + void requestResultCallback(n_t Inst, + llvm::function_ref LeakFact) { // XXX: Implement (was not necessary for paper eval) } void onResult(n_t Inst, d_t Fact) { diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp index 59b4da6e9e..d7b3de3b6e 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.cpp @@ -286,7 +286,7 @@ void monoifds::TaintAnalysis::LocalAnalysis::generateFactsAtCall( }); } -void monoifds::TaintAnalysis::LocalAnalysis::requestedEffectAtCall( +void monoifds::TaintAnalysis::LocalAnalysis::requestResultCallbackAtCallSite( n_t CS, f_t Callee, llvm::function_ref LeakFact) { forallLeakedFacts(*TA->Config, llvm::cast(CS), Callee, LeakFact); diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp index ce01c16bf3..23d2838e3f 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -27,9 +27,9 @@ void controller::executeMonoIFDSTaint(AnalysisController &Data) { monoifds::TaintAnalysis TA(&Config, &Data.HA->getUsedGlobals(), &CAI); monoifds::MonoIFDSSolver Solver(&TA, &Data.HA->getICFG()); - auto &FC = Data.HA->getCompressedFunctions(); - const auto &SCCs = Data.HA->getCGSCCs(); - Solver.setCGSCCs(&SCCs).setFunctionCompressor(&FC); + Solver // + .setCGSCCs(&Data.HA->getCGSCCs()) + .setFunctionCompressor(&Data.HA->getCompressedFunctions()); { std::optional MeasureTime; From f831859e8eb05a94d6068b5a3fb5d4a2e1b97983 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 30 Apr 2026 11:58:20 +0200 Subject: [PATCH 16/29] domain --- include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 2a4975395d..8ae90b2d5e 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -9,8 +9,8 @@ * Fabian Schiebel and others *****************************************************************************/ +#include "phasar/DataFlow/IfdsIde/IfdsIdeDomain.h" #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" -#include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/SCCId.h" @@ -35,7 +35,7 @@ namespace psr::monoifds { /// \brief Defines requirements for a MonoIFDS-compatible analysis domain. template -concept MonoIFDSAnalysisDomain = IsAnalysisDomain; +concept MonoIFDSAnalysisDomain = IfdsAnalysisDomain; /// \brief CG-SCC-local analysis template From fb56c1226b9575fab15d4f97d2feae4bd1d97c74 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 30 Apr 2026 17:12:34 +0200 Subject: [PATCH 17/29] Some nullable-correctness in analyzeBlockImpl --- include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 14 +++++++------- include/phasar/DataFlow/MonoIfds/RPOWorkList.h | 2 +- include/phasar/Utils/FunctionId.h | 2 -- include/phasar/Utils/Nullable.h | 1 + 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 888bb2f41b..4ca73fd44b 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -435,16 +435,16 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { Nullable CurrInst = BlockStart; do { - auto Last = CurrInst; + auto Last = unwrapNullable(CurrInst); do { - analyzeInstruction(IState, LocalState, CurrFunId, - unwrapNullable(CurrInst)); - Last = CurrInst; + auto Curr = unwrapNullable(CurrInst); + analyzeInstruction(IState, LocalState, CurrFunId, Curr); + Last = Curr; if constexpr (IsBlockAwareControlFlow) { - CurrInst = ICF->getUniqueSuccessor(unwrapNullable(CurrInst)); + CurrInst = ICF->getUniqueSuccessor(Curr); } else { - const auto &Succs = ICF->getSuccsOf(unwrapNullable(CurrInst)); + const auto &Succs = ICF->getSuccsOf(Curr); if (Succs.size() == 1) { CurrInst = Succs[0]; } else { @@ -456,7 +456,7 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { Nullable UniqueSucc{}; // We have at least one instruction, so we can safely unwrap here - const auto &Succs = ICF->getSuccsOf(unwrapNullable(Last)); + const auto &Succs = ICF->getSuccsOf(Last); const auto SuccSz = Succs.size(); const bool HasSingleSucc = SuccSz == 1; for (const auto &Succ : Succs) { diff --git a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h index 043b7da7aa..780e343fb4 100644 --- a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h @@ -16,7 +16,7 @@ namespace psr::monoifds { /// See SootUp's -/// [UniversePriorityQueue](https://github.com/soot-oss/SootUp/blob/develop/sootup.analysis.intraprocedural/src/main/java/sootup/analysis/intraprocedural/UniverseSortedPriorityQueue.java) +/// [UniverseSortedPriorityQueue](https://github.com/soot-oss/SootUp/blob/develop/sootup.analysis.intraprocedural/src/main/java/sootup/analysis/intraprocedural/UniverseSortedPriorityQueue.java) template class TopoFixpointDriver { public: TopoFixpointDriver() noexcept = default; diff --git a/include/phasar/Utils/FunctionId.h b/include/phasar/Utils/FunctionId.h index ddba7b3bd0..0d63ebf6f5 100644 --- a/include/phasar/Utils/FunctionId.h +++ b/include/phasar/Utils/FunctionId.h @@ -12,8 +12,6 @@ #include "phasar/Utils/Compressor.h" #include "phasar/Utils/StrongTypeDef.h" -#include "llvm/IR/Function.h" - #include PHASAR_STRONG_TYPEDEF(psr, uint32_t, FunctionId); diff --git a/include/phasar/Utils/Nullable.h b/include/phasar/Utils/Nullable.h index b8df4c8d0b..8a96f062a0 100644 --- a/include/phasar/Utils/Nullable.h +++ b/include/phasar/Utils/Nullable.h @@ -24,6 +24,7 @@ using Nullable = template requires std::is_convertible_v [[nodiscard]] constexpr T unwrapNullable(T &&Val) noexcept { + assert(Val && "Unwrapping null-value!"); return std::forward(Val); } template From 259223bae4c4104be0c7d51e1013d7a9259e5c67 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 6 May 2026 10:48:28 +0200 Subject: [PATCH 18/29] Integrate MonoIFDS with HelperAnalyses --- include/phasar/ControlFlow/CFG.h | 77 ++++++++++++------- include/phasar/ControlFlow/ICFG.h | 37 ++++++--- include/phasar/DataFlow/HelperAnalyses.h | 41 ++++++++++ .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 15 +++- lib/PhasarLLVM/HelperAnalyses.cpp | 13 +++- .../AnalysisControllerXMonoIFDSTaint.cpp | 10 ++- 6 files changed, 146 insertions(+), 47 deletions(-) create mode 100644 include/phasar/DataFlow/HelperAnalyses.h diff --git a/include/phasar/ControlFlow/CFG.h b/include/phasar/ControlFlow/CFG.h index 9fbecba675..2efe0cb3b1 100644 --- a/include/phasar/ControlFlow/CFG.h +++ b/include/phasar/ControlFlow/CFG.h @@ -14,13 +14,15 @@ #include "llvm/Support/raw_ostream.h" #include +#include #include namespace psr { template concept InstructionClassifier = - requires(const T &IC, typename T::n_t Inst, typename T::n_t Succ) { + requires(const T &IC, typename std::remove_cvref_t::n_t Inst, + typename std::remove_cvref_t::n_t Succ) { { IC.isCallSite(Inst) } -> std::convertible_to; { IC.isFieldLoad(Inst) } -> std::convertible_to; { IC.isFieldStore(Inst) } -> std::convertible_to; @@ -29,27 +31,36 @@ concept InstructionClassifier = }; template -concept CFG = requires(const T &CF, typename T::n_t Inst, typename T::f_t Fun) { - typename T::n_t; - typename T::f_t; +concept CFG = requires(const T &CF, typename std::remove_cvref_t::n_t Inst, + typename std::remove_cvref_t::f_t Fun) { + typename std::remove_cvref_t::n_t; + typename std::remove_cvref_t::f_t; /// Returns the function that contains the given instruction Inst. // TODO: Actually belongs into ProjectIRDB! - { CF.getFunctionOf(Inst) } -> std::convertible_to; + { + CF.getFunctionOf(Inst) + } -> std::convertible_to::f_t>; /// Returns an iterable range of all instructions of the given function that /// are part of the control-flow graph. // TODO: We should have sth like this in the ProjectIRDB as well! - { CF.getAllInstructionsOf(Fun) } -> psr::is_iterable_over_v; + { + CF.getAllInstructionsOf(Fun) + } -> psr::is_iterable_over_v::n_t>; /// Returns an iterable range of all successor instructions of Inst in the /// CFG. /// NOTE: This function is typically being called in a hot part of the /// analysis and should therefore be highly optimized for performance. - { CF.getSuccsOf(Inst) } -> psr::is_iterable_over_v; + { + CF.getSuccsOf(Inst) + } -> psr::is_iterable_over_v::n_t>; /// Returns an iterable range of all starting instructions of the given /// function. For a forward-CFG, this is typically a singleton range. - { CF.getStartPointsOf(Fun) } -> psr::is_iterable_over_v; + { + CF.getStartPointsOf(Fun) + } -> psr::is_iterable_over_v::n_t>; /// Returns whether the given Inst is a root node of the CFG { CF.isStartPoint(Inst) } -> std::convertible_to; @@ -61,43 +72,53 @@ concept CFG = requires(const T &CF, typename T::n_t Inst, typename T::f_t Fun) { }; template -concept CFGOf = CFG && std::same_as && - std::same_as; +concept CFGOf = + CFG && std::same_as::n_t> && + std::same_as::f_t>; template concept BidiCFG = - CFG && requires(const T &CF, typename T::n_t Inst, typename T::f_t Fun) { + CFG && requires(const T &CF, typename std::remove_cvref_t::n_t Inst, + typename std::remove_cvref_t::f_t Fun) { /// Returns an iterable range of all predecessor instructions of Inst in /// the CFG - { CF.getPredsOf(Inst) } -> psr::is_iterable_over_v; + { + CF.getPredsOf(Inst) + } -> psr::is_iterable_over_v::n_t>; /// Returns an iterable range of all exit instructions (often return /// instructions) of the given function. For a backward-CFG, this is /// typically a singleton range - { CF.getExitPointsOf(Fun) } -> psr::is_iterable_over_v; + { + CF.getExitPointsOf(Fun) + } -> psr::is_iterable_over_v::n_t>; }; template -concept CFGDump = requires(const T &CF, typename T::n_t Inst, - typename T::f_t Fun, llvm::raw_ostream &OS) { - { CF.getStatementId(Inst) } -> psr::is_string_like_v; - { CF.getFunctionName(Fun) } -> psr::is_string_like_v; - { CF.getDemangledFunctionName(Fun) } -> psr::is_string_like_v; - CF.print(Fun, OS); -}; +concept CFGDump = + requires(const T &CF, typename std::remove_cvref_t::n_t Inst, + typename std::remove_cvref_t::f_t Fun, llvm::raw_ostream &OS) { + { CF.getStatementId(Inst) } -> psr::is_string_like_v; + { CF.getFunctionName(Fun) } -> psr::is_string_like_v; + { CF.getDemangledFunctionName(Fun) } -> psr::is_string_like_v; + CF.print(Fun, OS); + }; template -concept CFGEdgesProvider = requires(const T &CF, typename T::f_t Fun) { +concept CFGEdgesProvider = requires(const T &CF, + typename std::remove_cvref_t::f_t Fun) { { CF.getAllControlFlowEdges(Fun) - } -> psr::is_iterable_over_v>; + } -> psr::is_iterable_over_v::n_t, + typename std::remove_cvref_t::n_t>>; }; template -concept IsBlockAwareControlFlow = requires(const T &CF, typename T::n_t Inst) { - { - CF.getUniqueSuccessor(Inst) - } -> std::convertible_to>; - { CF.hasUniquePredecessor(Inst) } -> std::convertible_to; -}; +concept IsBlockAwareControlFlow = + requires(const T &CF, typename std::remove_cvref_t::n_t Inst) { + { + CF.getUniqueSuccessor(Inst) + } -> std::convertible_to::n_t>>; + { CF.hasUniquePredecessor(Inst) } -> std::convertible_to; + }; } // namespace psr diff --git a/include/phasar/ControlFlow/ICFG.h b/include/phasar/ControlFlow/ICFG.h index d6129728a0..e4697e74f6 100644 --- a/include/phasar/ControlFlow/ICFG.h +++ b/include/phasar/ControlFlow/ICFG.h @@ -17,18 +17,24 @@ #include "llvm/Support/raw_ostream.h" #include +#include namespace psr { template concept ICFG = CFG && requires(const T &ICF, llvm::StringRef Name, - typename T::n_t Inst, typename T::f_t Fun) { - typename T::f_t; - typename T::n_t; + typename std::remove_cvref_t::n_t Inst, + typename std::remove_cvref_t::f_t Fun) { + typename std::remove_cvref_t::f_t; + typename std::remove_cvref_t::n_t; // TODO: Should not be duplicated with ProjectIRDB - { ICF.getAllFunctions() } -> is_iterable_over_v; + { + ICF.getAllFunctions() + } -> is_iterable_over_v::f_t>; // TODO: Should not be duplicated with ProjectIRDB - { ICF.getFunction(Name) } -> std::convertible_to>; + { + ICF.getFunction(Name) + } -> std::convertible_to::f_t>>; { ICF.isIndirectFunctionCall(Inst) } -> std::convertible_to; { ICF.isVirtualFunctionCall(Inst) } -> std::convertible_to; @@ -38,19 +44,27 @@ concept ICFG = CFG && requires(const T &ICF, llvm::StringRef Name, /// Returns an iterable range of all possible callee candidates at the given /// call-site induced by the used call-graph. Same as /// getCallGraph().getCalleesOfCallAt(Inst) - { ICF.getCalleesOfCallAt(Inst) } -> psr::is_iterable_over_v; + { + ICF.getCalleesOfCallAt(Inst) + } -> psr::is_iterable_over_v::f_t>; /// Returns an iterable range of all possible call-site candidates that may /// call the given function induced by the used call-graph. Same as /// getCallGraph().getCallersOf(Fun) - { ICF.getCallersOf(Fun) } -> psr::is_iterable_over_v; + { + ICF.getCallersOf(Fun) + } -> psr::is_iterable_over_v::n_t>; /// Returns an iterable range of all call-instruction in the given function - { ICF.getCallsFromWithin(Fun) } -> psr::is_iterable_over_v; + { + ICF.getCallsFromWithin(Fun) + } -> psr::is_iterable_over_v::n_t>; /// Returns an iterable range of all instructions in all functions of the ICFG /// that are neither call-sites nor start-points of a function // TODO: Get rid of this function - { ICF.allNonCallStartNodes() } -> psr::is_iterable_over_v; + { + ICF.allNonCallStartNodes() + } -> psr::is_iterable_over_v::n_t>; /// The total number of call-sites in the ICFG. Same as /// getCallGraph().getNumVertexCallSites() @@ -64,6 +78,7 @@ concept ICFGDump = requires(const T &ICF, llvm::raw_ostream &OS) { }; template -concept ICFGOf = ICFG && std::same_as && - std::same_as; +concept ICFGOf = + ICFG && std::same_as::n_t> && + std::same_as::f_t>; } // namespace psr diff --git a/include/phasar/DataFlow/HelperAnalyses.h b/include/phasar/DataFlow/HelperAnalyses.h new file mode 100644 index 0000000000..cba3e2582d --- /dev/null +++ b/include/phasar/DataFlow/HelperAnalyses.h @@ -0,0 +1,41 @@ +#pragma once + +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/ControlFlow/ICFG.h" +#include "phasar/Utils/FunctionId.h" +#include "phasar/Utils/SCCGeneric.h" + +#include + +namespace psr { +template +concept CanGetICFG = requires(T &HA) { + { HA.getICFG() } -> ICFG; +}; + +template +concept CanGetICFGOf = requires(T &HA) { + { HA.getICFG() } -> ICFGOf; +}; + +template +concept CanGetCompressedFunctionsOf = requires(T &HA) { + { + HA.getCompressedFunctions() + } -> std::convertible_to &>; +}; + +template +concept CanGetCGSCCs = requires(T &HA) { + { &HA.getCGSCCs() } -> std::convertible_to *>; +}; + +} // namespace psr diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index 4ca73fd44b..cacdeb8cfe 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -13,6 +13,7 @@ #include "phasar/ControlFlow/CGSCCs.h" #include "phasar/ControlFlow/ControlFlowOrder.h" #include "phasar/ControlFlow/ICFG.h" +#include "phasar/DataFlow/HelperAnalyses.h" #include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" #include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" @@ -39,7 +40,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/TypeName.h" -#include #include #include #include @@ -71,6 +71,19 @@ class MonoIFDSSolver : public MonoIFDFSSolverBase { : Problem(&assertNotNull(Problem)), ICF(&assertNotNull(ICF)), MBufRes(Alloc.resource()) {} + template HelperAnalysesT> + explicit MonoIFDSSolver(ProblemT *Problem, HelperAnalysesT &HA, + std::pmr::polymorphic_allocator<> Alloc = + std::pmr::get_default_resource()) + : MonoIFDSSolver(Problem, &HA.getICFG(), Alloc) { + if constexpr (CanGetCompressedFunctionsOf) { + setFunctionCompressor(&HA.getCompressedFunctions()); + } + if constexpr (CanGetCGSCCs) { + setCGSCCs(&HA.getCGSCCs()); + } + } + MonoIFDSSolver &setConfig(MonoIfdsConfig Config) & noexcept { this->Config = Config; return *this; diff --git a/lib/PhasarLLVM/HelperAnalyses.cpp b/lib/PhasarLLVM/HelperAnalyses.cpp index 3f1041a4f5..2ade892838 100644 --- a/lib/PhasarLLVM/HelperAnalyses.cpp +++ b/lib/PhasarLLVM/HelperAnalyses.cpp @@ -1,6 +1,7 @@ #include "phasar/PhasarLLVM/HelperAnalyses.h" #include "phasar/ControlFlow/CGSCCs.h" +#include "phasar/DataFlow/HelperAnalyses.h" #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" #include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCallGraphBuilder.h" @@ -17,7 +18,15 @@ #include #include -namespace psr { +using namespace psr; + +static_assert(CanGetICFG); +static_assert(CanGetICFGOf); +static_assert( + CanGetCompressedFunctionsOf); +static_assert(CanGetCGSCCs); + HelperAnalyses::HelperAnalyses(std::string IRFile, std::optional PrecomputedPTS, AliasAnalysisType PTATy, bool AllowLazyPTS, @@ -175,5 +184,3 @@ HelperAnalyses::getUsedGlobals() { } return *UsedGlobals; } - -} // namespace psr diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp index 23d2838e3f..64b7caee24 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -26,10 +26,12 @@ void controller::executeMonoIFDSTaint(AnalysisController &Data) { auto Config = makeTaintConfig(Data); monoifds::TaintAnalysis TA(&Config, &Data.HA->getUsedGlobals(), &CAI); - monoifds::MonoIFDSSolver Solver(&TA, &Data.HA->getICFG()); - Solver // - .setCGSCCs(&Data.HA->getCGSCCs()) - .setFunctionCompressor(&Data.HA->getCompressedFunctions()); + // monoifds::MonoIFDSSolver Solver(&TA, &Data.HA->getICFG()); + // Solver // + // .setCGSCCs(&Data.HA->getCGSCCs()) + // .setFunctionCompressor(&Data.HA->getCompressedFunctions()); + + monoifds::MonoIFDSSolver Solver(&TA, *Data.HA); { std::optional MeasureTime; From 4a9cbe7829ccb9bd5bcbc9fbc06217c124ecc803 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 6 May 2026 15:50:21 +0200 Subject: [PATCH 19/29] Use llvm::SmallBitVector in DataFlowEnvironment to match behavior presented in paper --- include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h index f8103e0a44..b641ade984 100644 --- a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h +++ b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h @@ -18,7 +18,7 @@ PHASAR_STRONG_TYPEDEF(psr::monoifds, uint32_t, SourceFactId); namespace psr::monoifds { -using SourceFactSet = BitSet; +using SourceFactSet = BitSet; /// The local analysis state: TargetFact-->{SourceFact} /// From 2a6528bc63d054d5f2e55f7626866bb4ad03bcec Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 11 May 2026 16:55:23 +0200 Subject: [PATCH 20/29] monir stuff + some general changes that should go into their own PR (aliasinfo -> aliasiterator conversion) --- include/phasar/DataFlow/IfdsIde/IfdsIdeDomain.h | 2 +- include/phasar/Domain/AnalysisDomain.h | 4 ++-- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 11 +++++++++-- include/phasar/Pointer/AliasInfo.h | 4 +++- .../Controller/AnalysisControllerXMonoIFDSTaint.cpp | 7 ++----- unittests/PhasarLLVM/Pointer/LLVMAliasSetTest.cpp | 4 ++++ 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/IfdsIdeDomain.h b/include/phasar/DataFlow/IfdsIde/IfdsIdeDomain.h index bb48c770f7..748a8a5360 100644 --- a/include/phasar/DataFlow/IfdsIde/IfdsIdeDomain.h +++ b/include/phasar/DataFlow/IfdsIde/IfdsIdeDomain.h @@ -33,7 +33,7 @@ concept IfdsAnalysisDomain = IsAnalysisDomain && requires() { typename T::i_t; requires IsDataFlowFact; - requires ICFG; + // requires ICFG; }; template diff --git a/include/phasar/Domain/AnalysisDomain.h b/include/phasar/Domain/AnalysisDomain.h index be0163e89b..03da68b9f6 100644 --- a/include/phasar/Domain/AnalysisDomain.h +++ b/include/phasar/Domain/AnalysisDomain.h @@ -29,8 +29,8 @@ concept IsAnalysisDomain = IRDomain && requires() { requires std::same_as; requires std::same_as; - typename T::c_t; - requires CFG; + // typename T::c_t; + // requires CFG; }; /// AnalysisDomain - This class should be specialized by different static diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index 19263eb65e..558c00aae1 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -10,6 +10,8 @@ *****************************************************************************/ #include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSProblem.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" // for concept checking #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" @@ -30,6 +32,9 @@ #include namespace psr::monoifds { + +/// Implementation of a generic taint analysis to be solved by the +/// MonoIFDSSolver. Conforms to the MonoIFDSProblem concept. class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { public: using ProblemAnalysisDomain = LLVMIFDSAnalysisDomainDefault; @@ -39,7 +44,9 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { const UsedGlobalsHolder *UsedGlobals, LLVMAliasIteratorRef AI) : Config(&assertNotNull(Config)), - UsedGlobals(&assertNotNull(UsedGlobals)), AI(AI) {} + UsedGlobals(&assertNotNull(UsedGlobals)), AI(AI) { + static_assert(MonoIFDSProblem); + } void setAnalysisPrinter( MaybeUniquePtr> P) { @@ -111,7 +118,7 @@ class TaintAnalysis : public LLVMIFDSAnalysisDomainDefault { Printer->onFinalize(OS); } - // Optional API function: Filter out facts that are do not need to go into a + // Optional API function: Filter out facts that do not need to go into a // procedure summary [[nodiscard]] bool shouldBeInSummary(d_t ExitFact, n_t ExitInst); diff --git a/include/phasar/Pointer/AliasInfo.h b/include/phasar/Pointer/AliasInfo.h index 67fecbc82d..ed9a531445 100644 --- a/include/phasar/Pointer/AliasInfo.h +++ b/include/phasar/Pointer/AliasInfo.h @@ -96,7 +96,6 @@ class [[gsl::Pointer]] AliasInfoRef assert(VT != nullptr); return AliasIteratorRef(AA, VT->ForallAliasesOf); } - constexpr operator AliasIteratorRef() && noexcept = delete; // -- Impl for IsAliasInfo: @@ -359,6 +358,9 @@ class [[clang::trivial_abi, gsl::Owner]] AliasInfo final [[nodiscard]] base_t get() & noexcept { return asRef(); } [[nodiscard]] AliasInfoRef get() const & noexcept { return asRef(); } [[nodiscard]] AliasInfoRef get() && = delete; + + using base_t::operator AliasIteratorRef; + constexpr operator AliasIteratorRef() && noexcept = delete; }; extern template class AliasInfoRefgetAliasInfo(); - FilteredLLVMAliasIterator FAI(AI); - CachedLLVMAliasIterator CAI(&FAI); + FilteredLLVMAliasIterator FAI(Data.HA->getAliasInfo()); auto Config = makeTaintConfig(Data); - monoifds::TaintAnalysis TA(&Config, &Data.HA->getUsedGlobals(), &CAI); + monoifds::TaintAnalysis TA(&Config, &Data.HA->getUsedGlobals(), &FAI); // monoifds::MonoIFDSSolver Solver(&TA, &Data.HA->getICFG()); // Solver // diff --git a/unittests/PhasarLLVM/Pointer/LLVMAliasSetTest.cpp b/unittests/PhasarLLVM/Pointer/LLVMAliasSetTest.cpp index 88e9abcb44..ed8b5aba0f 100644 --- a/unittests/PhasarLLVM/Pointer/LLVMAliasSetTest.cpp +++ b/unittests/PhasarLLVM/Pointer/LLVMAliasSetTest.cpp @@ -73,6 +73,10 @@ TEST(LLVMAliasSet, Global_01) { static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); +static_assert(std::is_convertible_v); +static_assert(!std::is_convertible_v, + "Prevent dangling references"); +static_assert(std::is_convertible_v); static_assert(std::is_convertible_v); int main(int Argc, char **Argv) { From c7f609a8e9f5a2fcb9798530a0091895ac8423da Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 18:12:33 +0200 Subject: [PATCH 21/29] As discussed, remove Eric from file headers --- include/phasar/ControlFlow/CGSCCs.h | 2 +- include/phasar/ControlFlow/ControlFlowOrder.h | 2 +- include/phasar/DataFlow/HelperAnalyses.h | 2 +- .../DataFlow/MonoIfds/ArraySetWorkList.h | 2 +- .../DataFlow/MonoIfds/DataFlowEnvironment.h | 2 +- .../DataFlow/MonoIfds/IterationStrategy.h | 2 +- .../phasar/DataFlow/MonoIfds/MonoIFDSConfig.h | 2 +- .../DataFlow/MonoIfds/MonoIFDSProblem.h | 198 +++++++++--------- .../phasar/DataFlow/MonoIfds/MonoIFDSSolver.h | 2 +- .../phasar/DataFlow/MonoIfds/RPOWorkList.h | 2 +- .../ControlFlow/FunctionCompressor.h | 4 +- .../PhasarLLVM/DataFlow/MonoIfds/AliasCache.h | 2 +- .../MonoIfds/Problems/MonoIFDSTaintAnalysis.h | 2 +- include/phasar/PhasarLLVM/Utils/UsedGlobals.h | 2 +- include/phasar/Utils/FunctionId.h | 2 +- include/phasar/Utils/Lazy.h | 2 +- include/phasar/Utils/UsedGlobalsHolder.h | 2 +- .../AnalysisControllerXMonoIFDSTaint.cpp | 2 +- 18 files changed, 115 insertions(+), 119 deletions(-) diff --git a/include/phasar/ControlFlow/CGSCCs.h b/include/phasar/ControlFlow/CGSCCs.h index b7061de4b9..5afe66dc53 100644 --- a/include/phasar/ControlFlow/CGSCCs.h +++ b/include/phasar/ControlFlow/CGSCCs.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/ControlFlow/ControlFlowOrder.h b/include/phasar/ControlFlow/ControlFlowOrder.h index 560ccbf6d9..06751b1471 100644 --- a/include/phasar/ControlFlow/ControlFlowOrder.h +++ b/include/phasar/ControlFlow/ControlFlowOrder.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/HelperAnalyses.h b/include/phasar/DataFlow/HelperAnalyses.h index cba3e2582d..2efe94a7d4 100644 --- a/include/phasar/DataFlow/HelperAnalyses.h +++ b/include/phasar/DataFlow/HelperAnalyses.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h index b78a8ea488..a2307d05f4 100644 --- a/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/ArraySetWorkList.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h index b641ade984..b32c7091a7 100644 --- a/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h +++ b/include/phasar/DataFlow/MonoIfds/DataFlowEnvironment.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/IterationStrategy.h b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h index 51c5109b1f..8fab199d90 100644 --- a/include/phasar/DataFlow/MonoIfds/IterationStrategy.h +++ b/include/phasar/DataFlow/MonoIfds/IterationStrategy.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h index a8aef1e706..93297d4c8d 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSConfig.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 8ae90b2d5e..96909d551e 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * @@ -39,106 +39,102 @@ concept MonoIFDSAnalysisDomain = IfdsAnalysisDomain; /// \brief CG-SCC-local analysis template -concept LocalMonoIFDSProblem = - requires(T &Problem, DataFlowEnvironment &InOut, - const DataFlowEnvironment &In, - typename Dom::n_t Inst, const typename Dom::n_t &Fact, - const typename Dom::f_t &Fun, - Compressor &SeedCompressor) { - /// Intra-procedural data-flow. Input facts are passed-in as InOut; - /// modifications are performed in-place. - /// - /// Corresponds to the $flow()$ function in the paper. - Problem.normalFlow(InOut, Inst); - - /// Intra-procedural data-flow at call-sites. Input facts are passed-in as - /// InOut; modifications are performed in-place. Kills facts that may be - /// strongly updated by the callee. Don't use it to *generate* facts. - /// - /// Corresponds to the $callFlow()$ function in the paper. - Problem.callToRetFlow(InOut, Inst); - - /// Inter-procedural data-flow at exit-statements; Maps callee-facts back - /// to the return-site in the caller. As with normal IFDS, this function - /// will be called for each incoming Fact, that should be mapped back; - /// return-site facts are returned by this function. - /// - /// Corresponds to the $returnVal()$ function in the paper. - { - Problem.returnFlow(Inst, Fact) - } -> psr::is_iterable_over_v; - - /// Inter-procedural data-flow at entry-statements; Maps - /// callee-source-facts back to the call-site in the caller. This function - /// will be called for each source Fact; call-site facts are returned by - /// this function. - /// - /// Corresponds to the $passArgs^{-1}()$ function in the paper. - { - Problem.invCallFlow(Inst, Fact) - } -> psr::is_iterable_over_v; - - /// Applies a pre-computed summary of Fun at Inst into InOut, if - /// applicable. - /// - /// Useful for pre-known taint-propagators and declaration-only library - /// functions. - /// - /// \returns True, iff a summary was applied. This will take precedence - /// over a summary that the solver may have computed for Fun! - { - Problem.summaryFlow(In, InOut, Inst, Fun) - } -> std::convertible_to; - - /// The special zero-value, aka. $\Lambda$. Always holds. Facts that are - /// generated unconditionally, originate from zero. - /// - /// Note: The solver guarantees that the zero-value always has - /// SourceFactId 0. - { Problem.getZeroValue() } -> std::convertible_to; - - /// Approximates the source-facts that should hold at the entry of Fun. - /// Input the facts in the InOut map as - /// `SeedState[Fact].insert(SeedCompressor.getOrInsert(Fact))` - /// - /// Note: This is assumed to be a (conservative) over-approximation! - Problem.initialSeeds(InOut, SeedCompressor, Fun); - - /// At a call-site Inst calling Fun, invokes the given callback for each - /// fact that should be generated from zero there. - /// - /// Useful for taint sources. - Problem.generateFactsAtCall(Inst, Fun, - [](const typename Dom::d_t & GenFact) {}); - - /// At a non-call-site Inst, invokes the given callback for each - /// fact that should be generated from zero there. - /// - /// Useful for taint sources. - Problem.generateFacts(Inst, [](const typename Dom::d_t & GenFact) {}); - - /// Invokes the given callback for each LeakFact for which the solver - /// should later call onResult(Inst, LeakFact), if LeakFacts holds at - /// Inst. Here, Inst is assumed to be a call-site that may call Fun. - /// - /// Useful for taint sinks. - Problem.requestResultCallbackAtCallSite( - Inst, Fun, [](const typename Dom::d_t & LeakFact) {}); - - /// Invokes the given callback for each LeakFact for which the solver - /// should later call onResult(Inst, LeakFact), if LeakFacts holds at - /// Inst. - /// - /// Useful for taint sinks. - Problem.requestResultCallback(Inst, - [](const typename Dom::d_t & LeakFact) {}); - - /// Notifies the problem that a previously requested leak-Fact now is - /// known to hold at Inst. - /// - /// Useful for reporting taint leaks. - Problem.onResult(Inst, Fact); - }; +concept LocalMonoIFDSProblem = requires( + T &Problem, DataFlowEnvironment &InOut, + const DataFlowEnvironment &In, typename Dom::n_t Inst, + const typename Dom::n_t &Fact, const typename Dom::f_t &Fun, + Compressor &SeedCompressor) { + /// Intra-procedural data-flow. Input facts are passed-in as InOut; + /// modifications are performed in-place. + /// + /// Corresponds to the $flow()$ function in the paper. + Problem.normalFlow(InOut, Inst); + + /// Intra-procedural data-flow at call-sites. Input facts are passed-in as + /// InOut; modifications are performed in-place. Kills facts that may be + /// strongly updated by the callee. Don't use it to *generate* facts. + /// + /// Corresponds to the $callFlow()$ function in the paper. + Problem.callToRetFlow(InOut, Inst); + + /// Inter-procedural data-flow at exit-statements; Maps callee-facts back + /// to the return-site in the caller. As with normal IFDS, this function + /// will be called for each incoming Fact, that should be mapped back; + /// return-site facts are returned by this function. + /// + /// Corresponds to the $returnVal()$ function in the paper. + { + Problem.returnFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + /// Inter-procedural data-flow at entry-statements; Maps + /// callee-source-facts back to the call-site in the caller. This function + /// will be called for each source Fact; call-site facts are returned by + /// this function. + /// + /// Corresponds to the $passArgs^{-1}()$ function in the paper. + { + Problem.invCallFlow(Inst, Fact) + } -> psr::is_iterable_over_v; + + /// Applies a pre-computed summary of Fun at Inst into InOut, if + /// applicable. + /// + /// Useful for pre-known taint-propagators and declaration-only library + /// functions. + /// + /// \returns True, iff a summary was applied. This will take precedence + /// over a summary that the solver may have computed for Fun! + { Problem.summaryFlow(In, InOut, Inst, Fun) } -> std::convertible_to; + + /// The special zero-value, aka. $\Lambda$. Always holds. Facts that are + /// generated unconditionally, originate from zero. + /// + /// Note: The solver guarantees that the zero-value always has + /// SourceFactId 0. + { Problem.getZeroValue() } -> std::convertible_to; + + /// Approximates the source-facts that should hold at the entry of Fun. + /// Input the facts in the InOut map as + /// `SeedState[Fact].insert(SeedCompressor.getOrInsert(Fact))` + /// + /// Note: This is assumed to be a (conservative) over-approximation! + Problem.initialSeeds(InOut, SeedCompressor, Fun); + + /// At a call-site Inst calling Fun, invokes the given callback for each + /// fact that should be generated from zero there. + /// + /// Useful for taint sources. + Problem.generateFactsAtCall(Inst, Fun, + [](const typename Dom::d_t &GenFact) {}); + + /// At a non-call-site Inst, invokes the given callback for each + /// fact that should be generated from zero there. + /// + /// Useful for taint sources. + Problem.generateFacts(Inst, [](const typename Dom::d_t &GenFact) {}); + + /// Invokes the given callback for each LeakFact for which the solver + /// should later call onResult(Inst, LeakFact), if LeakFacts holds at + /// Inst. Here, Inst is assumed to be a call-site that may call Fun. + /// + /// Useful for taint sinks. + Problem.requestResultCallbackAtCallSite( + Inst, Fun, [](const typename Dom::d_t &LeakFact) {}); + + /// Invokes the given callback for each LeakFact for which the solver + /// should later call onResult(Inst, LeakFact), if LeakFacts holds at + /// Inst. + /// + /// Useful for taint sinks. + Problem.requestResultCallback(Inst, [](const typename Dom::d_t &LeakFact) {}); + + /// Notifies the problem that a previously requested leak-Fact now is + /// known to hold at Inst. + /// + /// Useful for reporting taint leaks. + Problem.onResult(Inst, Fact); +}; /// \brief Defines requirements for an analysis problem that can be solved by /// the MonoIFDSSolver. diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h index cacdeb8cfe..3dc105b3c1 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSSolver.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h index 780e343fb4..3ee968158f 100644 --- a/include/phasar/DataFlow/MonoIfds/RPOWorkList.h +++ b/include/phasar/DataFlow/MonoIfds/RPOWorkList.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h b/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h index 6d2c488885..c104fa187b 100644 --- a/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h +++ b/include/phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * @@ -17,4 +17,4 @@ namespace psr { Compressor compressFunctions(const LLVMBasedCallGraph &CG, llvm::ArrayRef EntryPoints); -} // namespace psr \ No newline at end of file +} // namespace psr diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h index 660a86f077..6ce02ccbe8 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h index 558c00aae1..a786668d69 100644 --- a/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h index ac6b7fa4aa..00f3fcfe94 100644 --- a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h +++ b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/Utils/FunctionId.h b/include/phasar/Utils/FunctionId.h index 0d63ebf6f5..15404a8e70 100644 --- a/include/phasar/Utils/FunctionId.h +++ b/include/phasar/Utils/FunctionId.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/Utils/Lazy.h b/include/phasar/Utils/Lazy.h index 519003ef43..6abe6d7a95 100644 --- a/include/phasar/Utils/Lazy.h +++ b/include/phasar/Utils/Lazy.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/include/phasar/Utils/UsedGlobalsHolder.h b/include/phasar/Utils/UsedGlobalsHolder.h index d6e311a0c0..fc50bd3c26 100644 --- a/include/phasar/Utils/UsedGlobalsHolder.h +++ b/include/phasar/Utils/UsedGlobalsHolder.h @@ -1,7 +1,7 @@ #pragma once /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * diff --git a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp index c1b99844bc..52d5138ead 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXMonoIFDSTaint.cpp @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright (c) 2026 Fabian Schiebel, Eric Bodden. + * Copyright (c) 2026 Fabian Schiebel. * All rights reserved. This program and the accompanying materials are made * available under the terms of LICENSE.txt. * From fafc6c2f5bc7422a7740c7ee82fb4cd47329f529 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 18:59:39 +0200 Subject: [PATCH 22/29] Add example, how to use MonoIFDS --- .../06-run-monoifds-analysis/CMakeLists.txt | 25 ++++++ .../how-to/06-run-monoifds-analysis/README.md | 30 ++++++++ .../helper-analyses.cpp | 51 +++++++++++++ .../06-run-monoifds-analysis/manual.cpp | 76 +++++++++++++++++++ include/phasar/DataFlow.h | 7 ++ include/phasar/PhasarLLVM/ControlFlow.h | 1 + include/phasar/PhasarLLVM/DataFlow.h | 2 + include/phasar/PhasarLLVM/Utils.h | 1 + 8 files changed, 193 insertions(+) create mode 100644 examples/how-to/06-run-monoifds-analysis/CMakeLists.txt create mode 100644 examples/how-to/06-run-monoifds-analysis/README.md create mode 100644 examples/how-to/06-run-monoifds-analysis/helper-analyses.cpp create mode 100644 examples/how-to/06-run-monoifds-analysis/manual.cpp diff --git a/examples/how-to/06-run-monoifds-analysis/CMakeLists.txt b/examples/how-to/06-run-monoifds-analysis/CMakeLists.txt new file mode 100644 index 0000000000..87651cc8b3 --- /dev/null +++ b/examples/how-to/06-run-monoifds-analysis/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.14...3.28) + +project(run-monoifds-analysis) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +find_package(phasar REQUIRED CONFIG) + + +add_executable(run-monoifds-analysis-helper-analyses helper-analyses.cpp) +target_link_libraries(run-monoifds-analysis-helper-analyses PRIVATE phasar::phasar) + +add_executable(run-monoifds-analysis-manual manual.cpp) +target_link_libraries(run-monoifds-analysis-manual PRIVATE phasar::phasar) + + +if (TARGET run_sample_programs) + add_custom_target(run_run_monoifds_analysis + DEPENDS run-monoifds-analysis-helper-analyses run-monoifds-analysis-manual LLFileGeneration + COMMAND $ "${CMAKE_CURRENT_BINARY_DIR}/../llvm-hello-world/target/taint_cpp_dbg.ll" + COMMAND $ "${CMAKE_CURRENT_BINARY_DIR}/../llvm-hello-world/target/taint_cpp_dbg.ll" + ) + + add_dependencies(run_sample_programs run_run_monoifds_analysis) +endif() diff --git a/examples/how-to/06-run-monoifds-analysis/README.md b/examples/how-to/06-run-monoifds-analysis/README.md new file mode 100644 index 0000000000..fb5fcf0232 --- /dev/null +++ b/examples/how-to/06-run-monoifds-analysis/README.md @@ -0,0 +1,30 @@ +# Run a MonoIFDS Analysis + +Shows some way, how you can use PhASAR to run an already existing MonoIFDS analysis on a LLVM IR module. +For this example, we selected the `MonoIFDSTaintAnalysis`. + +You may look at the different C++ source files to see, how you can run an MonoIFDS taint analysis using PhASAR. +We suggest to start with the simplest example [helper-analyses.cpp](./helper-analyses.cpp). + +## Build + +This example program can be built using cmake. +It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. + +```bash +# Invoked from the 04-run-ifds-analysis root folder: +$ mkdir -p build && cd build +$ cmake .. +$ cmake --build . +``` + +## Test + +You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). + +```bash +# Invoked from the 04-run-ifds-analysis/build folder: +./run-monoifds-analysis-helper-analyses ../../../llvm-hello-world/target/taint.ll + +./run-monoifds-analysis-manual ../../../llvm-hello-world/target/taint.ll +``` diff --git a/examples/how-to/06-run-monoifds-analysis/helper-analyses.cpp b/examples/how-to/06-run-monoifds-analysis/helper-analyses.cpp new file mode 100644 index 0000000000..2449037199 --- /dev/null +++ b/examples/how-to/06-run-monoifds-analysis/helper-analyses.cpp @@ -0,0 +1,51 @@ +#include "phasar/DataFlow.h" // For MonoIFDSSolver +#include "phasar/PhasarLLVM.h" // For the HelperAnalyses +#include "phasar/PhasarLLVM/DataFlow.h" // For the MonoIFDSTaintAnalysis +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasIterator.h" +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() + << "USAGE: run-monoifds-analysis-helper-analyses \n"; + return 1; + } + + using namespace std::string_literals; + std::vector EntryPoints = {"main"s}; + + // Instead of creating all the helper analyses ourselves, we can just use the + // HelperAnalyses class. It will create the necessary information on-demand. + // + // You can customize the underlying algorithms by passing a + // HelperAnalysisConfig as third parameter + psr::HelperAnalyses HA(Argv[1], EntryPoints); + if (!HA.getProjectIRDB()) { + return 1; + } + + // Create the taint configuration + psr::LLVMTaintConfig TC(HA.getProjectIRDB()); + TC.print(); + llvm::outs() << "------------------------\n"; + + // More precise alias-information; techically, this is not required, but it + // helps a lot + psr::FilteredLLVMAliasIterator FAI(HA.getAliasInfo()); + + // Create the taint analysis problem: + psr::monoifds::TaintAnalysis TaintProblem(&TC, &HA.getUsedGlobals(), &FAI); + + // To solve the taint problem, we now create an instance of the + // MonoIFDSSolver. Passing the HelperAnalyses here, lets the solver + // automatically grab the needed information + psr::monoifds::MonoIFDSSolver Solver(&TaintProblem, HA); + + // Solves the taint problem. This may take some time. + Solver.solve(); + + // The monoifds::TaintAnalysis is set-up to use the analysis-printer (see + // ../04-run-ifds-analysis/otf-reporter.cpp). By default, it prints the + // detected leaks into the given llvm::raw_ostream + TaintProblem.emitTextReport(llvm::outs()); +} diff --git a/examples/how-to/06-run-monoifds-analysis/manual.cpp b/examples/how-to/06-run-monoifds-analysis/manual.cpp new file mode 100644 index 0000000000..e6986f7053 --- /dev/null +++ b/examples/how-to/06-run-monoifds-analysis/manual.cpp @@ -0,0 +1,76 @@ +#include "phasar/DataFlow.h" // For MonoIFDSSolver +#include "phasar/PhasarLLVM/ControlFlow.h" // For FunctionCompressor & getEntryFunctions +#include "phasar/PhasarLLVM/DataFlow.h" // For the MonoIFDSTaintAnalysis +#include "phasar/PhasarLLVM/Pointer.h" // For the LLVMAliasSet +#include "phasar/PhasarLLVM/TaintConfig.h" // For the LLVMTaintConfig +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" + +int main(int Argc, char *Argv[]) { + if (Argc < 2) { + llvm::errs() << "USAGE: run-monoifds-analysis-manual \n"; + return 1; + } + + using namespace std::string_literals; + std::vector EntryPoints = {"main"s}; + + // Load the IR + auto IRDB = psr::LLVMProjectIRDB::loadOrExit(Argv[1]); + + // The MonoIFDSTaintAnalysis requires alias information, so create it here + psr::LLVMAliasSet AS(&IRDB); + + // We use a type-hierarchy to build the call-graph (LLVMBasedICFG below) + psr::DIBasedTypeHierarchy TH(IRDB); + + // Create the ICFG + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::VTA, {"main"}, &TH, + &AS); + + // Assign each reachable llvm::Function in the call-graph a sequential ID. + // This is needed for SCC computation and for the solver + auto Funs = psr::compressFunctions(ICFG.getCallGraph(), + psr::getEntryFunctions(IRDB, EntryPoints)); + + // Compute the call-graph SCCs. + auto CGSCCs = computeCGSCCs(ICFG, Funs); + + // Build a dependency-graph induced by the call-graph, collapsing each SCC to + // a single node + auto SCCC = computeCGSCCCallers(ICFG, Funs, CGSCCs); + + // For each CGSCC, compute which global variables are (transitively) used by + // any function in that SCC + auto UG = psr::computeUsedGlobals(IRDB, Funs, CGSCCs, SCCC); + + // Create the taint configuration + psr::LLVMTaintConfig TC(IRDB); + TC.print(); + llvm::outs() << "------------------------\n"; + + // More precise alias-information; techically, this is not required, but it + // helps a lot + psr::FilteredLLVMAliasIterator FAI(&AS); + + // Create the taint analysis problem: + psr::monoifds::TaintAnalysis TaintProblem(&TC, &UG, &FAI); + + // To solve the taint problem, we now create an instance of the + // MonoIFDSSolver. Passing the HelperAnalyses here, lets the solver + // automatically grab the needed information + psr::monoifds::MonoIFDSSolver Solver(&TaintProblem, &ICFG); + + // Supply the solver with the previously computed helper information. If we + // don't provide this, the solver would compute them on its own once solve() + // is called. + Solver.setCGSCCs(&CGSCCs).setFunctionCompressor(&Funs); + + // Solves the taint problem. This may take some time. + Solver.solve(); + + // The monoifds::TaintAnalysis is set-up to use the analysis-printer (see + // ../04-run-ifds-analysis/otf-reporter.cpp). By default, it prints the + // detected leaks into the given llvm::raw_ostream + TaintProblem.emitTextReport(llvm::outs()); +} diff --git a/include/phasar/DataFlow.h b/include/phasar/DataFlow.h index 5cd7522f42..d6d52320b0 100644 --- a/include/phasar/DataFlow.h +++ b/include/phasar/DataFlow.h @@ -32,6 +32,13 @@ #include "phasar/DataFlow/Mono/IntraMonoProblem.h" #include "phasar/DataFlow/Mono/Solver/InterMonoSolver.h" #include "phasar/DataFlow/Mono/Solver/IntraMonoSolver.h" +#include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/DataFlow/MonoIfds/IterationStrategy.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSProblem.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSSolver.h" +#include "phasar/DataFlow/MonoIfds/RPOWorkList.h" #include "phasar/DataFlow/PathSensitivity/PathSensitivityConfig.h" #include "phasar/DataFlow/PathSensitivity/PathSensitivityManager.h" diff --git a/include/phasar/PhasarLLVM/ControlFlow.h b/include/phasar/PhasarLLVM/ControlFlow.h index 7019a511d7..c4bba795b9 100644 --- a/include/phasar/PhasarLLVM/ControlFlow.h +++ b/include/phasar/PhasarLLVM/ControlFlow.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_CONTROLFLOW_H #include "phasar/PhasarLLVM/ControlFlow/EntryFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/FunctionCompressor.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedBackwardICFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" diff --git a/include/phasar/PhasarLLVM/DataFlow.h b/include/phasar/PhasarLLVM/DataFlow.h index 1c4e6a4611..f3c6b63579 100644 --- a/include/phasar/PhasarLLVM/DataFlow.h +++ b/include/phasar/PhasarLLVM/DataFlow.h @@ -31,5 +31,7 @@ #include "phasar/PhasarLLVM/DataFlow/Mono/Problems/IntraMonoFullConstantPropagation.h" #include "phasar/PhasarLLVM/DataFlow/Mono/Problems/IntraMonoSolverTest.h" #include "phasar/PhasarLLVM/DataFlow/Mono/Problems/IntraMonoUninitVariables.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" #endif // PHASAR_PHASARLLVM_DATAFLOW_H diff --git a/include/phasar/PhasarLLVM/Utils.h b/include/phasar/PhasarLLVM/Utils.h index cda1b77dc3..8a83a4d0c2 100644 --- a/include/phasar/PhasarLLVM/Utils.h +++ b/include/phasar/PhasarLLVM/Utils.h @@ -17,5 +17,6 @@ #include "phasar/PhasarLLVM/Utils/LLVMCXXShorthands.h" #include "phasar/PhasarLLVM/Utils/LLVMIRToSrc.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/PhasarLLVM/Utils/UsedGlobals.h" #endif // PHASAR_PHASARLLVM_UTILS_H From 00a0c82258124229be046ae1f0584adda9265baf Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 19:10:36 +0200 Subject: [PATCH 23/29] pre-commit + readme --- examples/how-to/06-run-monoifds-analysis/README.md | 4 ++-- include/phasar/Utils/TypeTraits.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/how-to/06-run-monoifds-analysis/README.md b/examples/how-to/06-run-monoifds-analysis/README.md index fb5fcf0232..1fb5ec1a32 100644 --- a/examples/how-to/06-run-monoifds-analysis/README.md +++ b/examples/how-to/06-run-monoifds-analysis/README.md @@ -12,7 +12,7 @@ This example program can be built using cmake. It assumes, that you have installed PhASAR on your system. If you did not install PhASAR to a default location, you can specify `-Dphasar_ROOT=your/path/to/phasar` when invoking `cmake`, replacing "your/path/to/phasar" by the actual path where you have installed PhASAR. ```bash -# Invoked from the 04-run-ifds-analysis root folder: +# Invoked from the 06-run-monoifds-analysis root folder: $ mkdir -p build && cd build $ cmake .. $ cmake --build . @@ -23,7 +23,7 @@ $ cmake --build . You can test the example program on the target programs from [llvm-hello-world/target](../../llvm-hello-world/target/). ```bash -# Invoked from the 04-run-ifds-analysis/build folder: +# Invoked from the 06-run-monoifds-analysis/build folder: ./run-monoifds-analysis-helper-analyses ../../../llvm-hello-world/target/taint.ll ./run-monoifds-analysis-manual ../../../llvm-hello-world/target/taint.ll diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index d83bb5826a..e81d94d42b 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -111,7 +111,7 @@ template concept Foreachable = requires(T &Val) { Val.foreach ([](auto &&...Elem) {}); }; template concept ForeachableOver = - requires(T &Val) { Val.foreach ([](const Over & ...Elem) {}); }; + requires(T &Val) { Val.foreach ([](const Over &...Elem) {}); }; template concept is_pair_v = detail::is_pair::value; // NOLINT From 44f8f0deff6039f02cab7545eca0f0ce89fa02a5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 19:18:19 +0200 Subject: [PATCH 24/29] modules --- lib/DataFlow/CMakeLists.txt | 2 ++ lib/DataFlow/MonoIfds/CMakeLists.txt | 14 ++++++++++ lib/DataFlow/MonoIfds/MonoIfds.cppm | 28 +++++++++++++++++++ .../DataFlow/MonoIfds/MonoIfds.cppm | 11 ++++++++ 4 files changed, 55 insertions(+) create mode 100644 lib/DataFlow/MonoIfds/CMakeLists.txt create mode 100644 lib/DataFlow/MonoIfds/MonoIfds.cppm create mode 100644 lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm diff --git a/lib/DataFlow/CMakeLists.txt b/lib/DataFlow/CMakeLists.txt index 8fbdcb465a..ae63640d5d 100644 --- a/lib/DataFlow/CMakeLists.txt +++ b/lib/DataFlow/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(IfdsIde) add_subdirectory(Mono) +add_subdirectory(MonoIfds) add_subdirectory(PathSensitivity) file(GLOB_RECURSE DATAFLOW_SRC *.h *.cpp) @@ -10,6 +11,7 @@ add_phasar_library(phasar_dataflow LINKS phasar_dataflow_ifdside phasar_dataflow_mono + phasar_dataflow_monoifds phasar_dataflow_pathsensitivity MODULE_FILES diff --git a/lib/DataFlow/MonoIfds/CMakeLists.txt b/lib/DataFlow/MonoIfds/CMakeLists.txt new file mode 100644 index 0000000000..d12edc2372 --- /dev/null +++ b/lib/DataFlow/MonoIfds/CMakeLists.txt @@ -0,0 +1,14 @@ +file(GLOB_RECURSE MONOIFDS_SRC *.h *.cpp) + +add_phasar_library(phasar_dataflow_monoifds + ${MONOIFDS_SRC} + + LINK_PRIVATE + nlohmann_json::nlohmann_json + + LLVM_LINK_COMPONENTS + Support + + MODULE_FILES + MonoIfds.cppm +) diff --git a/lib/DataFlow/MonoIfds/MonoIfds.cppm b/lib/DataFlow/MonoIfds/MonoIfds.cppm new file mode 100644 index 0000000000..af1b2eeff1 --- /dev/null +++ b/lib/DataFlow/MonoIfds/MonoIfds.cppm @@ -0,0 +1,28 @@ +module; + +#include "phasar/DataFlow/MonoIfds/ArraySetWorkList.h" +#include "phasar/DataFlow/MonoIfds/DataFlowEnvironment.h" +#include "phasar/DataFlow/MonoIfds/IterationStrategy.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSConfig.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSProblem.h" +#include "phasar/DataFlow/MonoIfds/MonoIFDSSolver.h" +#include "phasar/DataFlow/MonoIfds/RPOWorkList.h" + +export module phasar.dataflow.monoifds; + +export namespace psr { +using psr::monoifds::ArraySetDriver; +using psr::monoifds::DataFlowEnvironment; +using psr::monoifds::HasShouldBeInSummary; +using psr::monoifds::IterationStrategy; +using psr::monoifds::LocalMonoIFDSProblem; +using psr::monoifds::MonoIFDFSSolver; +using psr::monoifds::MonoIFDFSSolverBase; +using psr::monoifds::MonoIFDSAnalysisDomain; +using psr::monoifds::MonoIfdsConfig; +using psr::monoifds::MonoIFDSProblem; +using psr::monoifds::SourceFactId; +using psr::monoifds::SourceFactSet; +using psr::monoifds::to_string; +using psr::monoifds::TopoFixpointDriver; +} // namespace psr diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm b/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm new file mode 100644 index 0000000000..14af37253d --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm @@ -0,0 +1,11 @@ +module; + +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" +#include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" + +export module phasar.phasar_llvm.dataflow.monoifds; + +export namespace psr { +using psr::monoifds::AliasCache; +using psr::monoifds::TaintAnalysis; +} // namespace psr From 1f599b2bba2bcb6a4f3b34f251a833a3b61fecaa Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 19:28:04 +0200 Subject: [PATCH 25/29] llvm 22 --- lib/PhasarLLVM/Utils/UsedGlobals.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/PhasarLLVM/Utils/UsedGlobals.cpp b/lib/PhasarLLVM/Utils/UsedGlobals.cpp index 3217f690a4..51db347934 100644 --- a/lib/PhasarLLVM/Utils/UsedGlobals.cpp +++ b/lib/PhasarLLVM/Utils/UsedGlobals.cpp @@ -15,7 +15,7 @@ using namespace psr; static bool isEffectivelyConstant(const llvm::GlobalVariable *Glob) { auto Name = Glob->getName(); - if (Name.startswith("_ZTV") || Name.startswith("_ZTI")) { + if (Name.starts_with("_ZTV") || Name.starts_with("_ZTI")) { return true; } @@ -48,8 +48,23 @@ static bool isEffectivelyConstant(const llvm::GlobalVariable *Glob) { } auto Idx = Use.getOperandNo(); + + const bool IsNocaptureParam = +#if LLVM_VERSION_MAJOR <= 20 + Call->paramHasAttr(Idx, llvm::Attribute::NoCapture); +#else + [&] { + auto Captures = Call->getCaptureInfo(Idx); + auto CComp = + Captures.getOtherComponents() | Captures.getRetComponents(); + return !(llvm::capturesAnyProvenance(CComp) || + (llvm::capturesAddress(CComp) && + !llvm::capturesAddressIsNullOnly(CComp))); + }(); +#endif + bool IsReadonlyParam = - Call->paramHasAttr(Idx, llvm::Attribute::NoCapture) && + IsNocaptureParam && (Call->paramHasAttr(Idx, llvm::Attribute::ReadOnly) || Call->paramHasAttr(Idx, llvm::Attribute::ReadNone)); From b1260a5efc279b59bc68aece645a5de44e68b44f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 19:51:58 +0200 Subject: [PATCH 26/29] Fix dependency of UsedGlobals to ProjectIRDB --- include/phasar/PhasarLLVM/Utils/UsedGlobals.h | 22 +++++++++++++++++-- lib/PhasarLLVM/Utils/UsedGlobals.cpp | 14 ++++++------ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h index 00f3fcfe94..3468ae0fb0 100644 --- a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h +++ b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h @@ -9,15 +9,33 @@ * Fabian Schiebel and others *****************************************************************************/ -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/DB/ProjectIRDB.h" #include "phasar/Utils/FunctionId.h" #include "phasar/Utils/SCCGeneric.h" #include "phasar/Utils/UsedGlobalsHolder.h" +namespace llvm { +class Module; +class GlobalVariable; +class Function; + +} // namespace llvm + namespace psr { UsedGlobalsHolder computeUsedGlobals( - const LLVMProjectIRDB &IRDB, + const llvm::Module &Mod, const Compressor &Functions, const SCCHolder &SCCs, const SCCDependencyGraph &Callers); + +// Same as above overload, but uses LLVMProjectIRDB. We cannot directly use +// LLVMProjectIARDB here, as it would create a circular dependency between +// phasar_llvm_utils and phasar_llvm_db +UsedGlobalsHolder computeUsedGlobals( + const ProjectIRDB auto &IRDB, + const Compressor &Functions, + const SCCHolder &SCCs, + const SCCDependencyGraph &Callers) { + return computeUsedGlobals(*IRDB.getModule(), Functions, SCCs, Callers); +} } // namespace psr diff --git a/lib/PhasarLLVM/Utils/UsedGlobals.cpp b/lib/PhasarLLVM/Utils/UsedGlobals.cpp index 51db347934..53bbd32ea6 100644 --- a/lib/PhasarLLVM/Utils/UsedGlobals.cpp +++ b/lib/PhasarLLVM/Utils/UsedGlobals.cpp @@ -86,16 +86,16 @@ static bool isEffectivelyConstant(const llvm::GlobalVariable *Glob) { } static llvm::SmallDenseSet -computeEffectivelyConstGlobals(const LLVMProjectIRDB &IRDB) { +computeEffectivelyConstGlobals(const llvm::Module &Mod) { llvm::SmallDenseSet Ret; - for (const auto *Glob : IRDB.getAllGlobals()) { - if (Glob->isConstant()) { + for (const auto &Glob : Mod.globals()) { + if (Glob.isConstant()) { continue; } - if (isEffectivelyConstant(Glob)) { - Ret.insert(Glob); + if (isEffectivelyConstant(&Glob)) { + Ret.insert(&Glob); } } @@ -172,7 +172,7 @@ propagateGlobals(UsedGlobalsHolder &Ret, } UsedGlobalsHolder psr::computeUsedGlobals( - const LLVMProjectIRDB &IRDB, + const llvm::Module &Mod, const Compressor &Functions, const SCCHolder &SCCs, const SCCDependencyGraph &Callers) { @@ -180,7 +180,7 @@ UsedGlobalsHolder psr::computeUsedGlobals( Ret.InitialGlobsPerSCC.resize(SCCs.size()); Ret.GlobsPerSCC.resize(SCCs.size()); - auto EffectivelyConstGlobals = computeEffectivelyConstGlobals(IRDB); + auto EffectivelyConstGlobals = computeEffectivelyConstGlobals(Mod); initialize(Ret, Functions, SCCs, EffectivelyConstGlobals); propagateGlobals(Ret, Callers); From 3ffb334a9f1d18e645ec037dc8d0b6a26d86f7af Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 19:59:42 +0200 Subject: [PATCH 27/29] Fix MonoIFDSSolver typo --- include/phasar/PhasarLLVM/Utils/UsedGlobals.h | 2 +- lib/DataFlow/MonoIfds/MonoIfds.cppm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h index 3468ae0fb0..211cd245c4 100644 --- a/include/phasar/PhasarLLVM/Utils/UsedGlobals.h +++ b/include/phasar/PhasarLLVM/Utils/UsedGlobals.h @@ -29,7 +29,7 @@ UsedGlobalsHolder computeUsedGlobals( const SCCDependencyGraph &Callers); // Same as above overload, but uses LLVMProjectIRDB. We cannot directly use -// LLVMProjectIARDB here, as it would create a circular dependency between +// LLVMProjectIRDB here, as it would create a circular dependency between // phasar_llvm_utils and phasar_llvm_db UsedGlobalsHolder computeUsedGlobals( const ProjectIRDB auto &IRDB, diff --git a/lib/DataFlow/MonoIfds/MonoIfds.cppm b/lib/DataFlow/MonoIfds/MonoIfds.cppm index af1b2eeff1..f20d375f00 100644 --- a/lib/DataFlow/MonoIfds/MonoIfds.cppm +++ b/lib/DataFlow/MonoIfds/MonoIfds.cppm @@ -16,11 +16,11 @@ using psr::monoifds::DataFlowEnvironment; using psr::monoifds::HasShouldBeInSummary; using psr::monoifds::IterationStrategy; using psr::monoifds::LocalMonoIFDSProblem; -using psr::monoifds::MonoIFDFSSolver; using psr::monoifds::MonoIFDFSSolverBase; using psr::monoifds::MonoIFDSAnalysisDomain; using psr::monoifds::MonoIfdsConfig; using psr::monoifds::MonoIFDSProblem; +using psr::monoifds::MonoIFDSSolver; using psr::monoifds::SourceFactId; using psr::monoifds::SourceFactSet; using psr::monoifds::to_string; From 17754586b8bb9243dc93dfaea070907bfd6d84f1 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 May 2026 20:19:57 +0200 Subject: [PATCH 28/29] minor --- lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm b/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm index 14af37253d..1996299691 100644 --- a/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm +++ b/lib/PhasarLLVM/DataFlow/MonoIfds/MonoIfds.cppm @@ -3,7 +3,7 @@ module; #include "phasar/PhasarLLVM/DataFlow/MonoIfds/AliasCache.h" #include "phasar/PhasarLLVM/DataFlow/MonoIfds/Problems/MonoIFDSTaintAnalysis.h" -export module phasar.phasar_llvm.dataflow.monoifds; +export module phasar.llvm.dataflow.monoifds; export namespace psr { using psr::monoifds::AliasCache; From 5a82b1d230cb95047cdc87aae58903be4e3d3d3a Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 14 May 2026 11:13:22 +0200 Subject: [PATCH 29/29] Fix module compilation. Also use DummyFn in concept instead of lambda to prevent a clang-22 crash --- include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h | 11 +++++------ include/phasar/Utils/TypeTraits.h | 4 ++-- lib/PhasarLLVM/DataFlow/CMakeLists.txt | 1 + 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h index 96909d551e..0ce752bd34 100644 --- a/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h +++ b/include/phasar/DataFlow/MonoIfds/MonoIFDSProblem.h @@ -105,29 +105,28 @@ concept LocalMonoIFDSProblem = requires( /// fact that should be generated from zero there. /// /// Useful for taint sources. - Problem.generateFactsAtCall(Inst, Fun, - [](const typename Dom::d_t &GenFact) {}); + Problem.generateFactsAtCall(Inst, Fun, DummyFn{}); /// At a non-call-site Inst, invokes the given callback for each /// fact that should be generated from zero there. /// /// Useful for taint sources. - Problem.generateFacts(Inst, [](const typename Dom::d_t &GenFact) {}); + Problem.generateFacts(Inst, DummyFn{}); /// Invokes the given callback for each LeakFact for which the solver /// should later call onResult(Inst, LeakFact), if LeakFacts holds at /// Inst. Here, Inst is assumed to be a call-site that may call Fun. /// /// Useful for taint sinks. - Problem.requestResultCallbackAtCallSite( - Inst, Fun, [](const typename Dom::d_t &LeakFact) {}); + Problem.requestResultCallbackAtCallSite(Inst, Fun, + DummyFn{}); /// Invokes the given callback for each LeakFact for which the solver /// should later call onResult(Inst, LeakFact), if LeakFacts holds at /// Inst. /// /// Useful for taint sinks. - Problem.requestResultCallback(Inst, [](const typename Dom::d_t &LeakFact) {}); + Problem.requestResultCallback(Inst, DummyFn{}); /// Notifies the problem that a previously requested leak-Fact now is /// known to hold at Inst. diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index e81d94d42b..403b8cc912 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -294,8 +294,8 @@ struct IdentityFn { } }; -template struct DummyFn { - void operator()(ArgT Arg) const noexcept {} +template struct DummyFn { + void operator()(ArgsT... Arg) const noexcept {} }; /// True if T can be relocated by copying its bytes (e.g. via memcpy) without diff --git a/lib/PhasarLLVM/DataFlow/CMakeLists.txt b/lib/PhasarLLVM/DataFlow/CMakeLists.txt index 41f3877cf9..04c4ebd95a 100644 --- a/lib/PhasarLLVM/DataFlow/CMakeLists.txt +++ b/lib/PhasarLLVM/DataFlow/CMakeLists.txt @@ -7,6 +7,7 @@ add_phasar_library(phasar_llvm_dataflow LINKS phasar_llvm_ifdside phasar_llvm_mono + phasar_llvm_monoifds phasar_llvm_pathsensitivity MODULE_FILES