123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)openUnitsexceptionBottom(** The {!Bottom} exception can be raised by any transfer function that realises
the state is empty. This should mainly be {!WithAssume.assume}, but it can
also forward transfer function (for instance, division by zero). *)exceptionMemory_Empty(** Raised e.g. when storing to an invalid location. *)moduleLog=Tracelog.Make(structletcategory="Domains.Sig"end);;moduleQuadrivalent=Lattices.Quadrivalent(** A Context represent a set of paths leading to the current state (it
corresponds to a path condition in symbolic execution)
Note: we use a module for Context, instead of a type, so that it
can be used as a functor argument and replace the Arity.
TODO: Rename Context to AbsState: the context is now a representation
of the state at a program point. *)moduletypeContext=sigtypetvallevel:t->int(** Create an independent copy of the context. *)valcopy:t->t(** [assign ctx1 ctx2] makes [ctx1] a copy of [ctx2]. *)valassign:t->t->unit(** Types for serialization. *)(** The type of the tuples of argument to nondet (i.e., arguments of a phi function). *)type'ain_tupletypeempty_tuplevalempty_tuple:unit->empty_tuplein_tuple(** An accumulator is a set of arguments to nondet, and an accumulated inclusion check. *)type'ain_acc=bool*'ain_tuple(** The type of the result of the phi function. *)type'aout_tuple(** We use a GADT because ['some] is existentially quantified: we don't want
the type of {!in_tuple} to appear in serialization function, as, for instance,
what we put in in in_tuple can depend on some condition.
The boolean expresses whether the {b second} operand of the serialization was included
in the {b first} one. *)type('a,'b)result=Result:bool*'somein_tuple*(t->'someout_tuple->'a*'bout_tuple)->('a,'b)resultend(* TODO: This should return the context, and have the form of a state
monad (i.e. state -> 'a * state).
We can do it gradually, e.g. changing the booleans or the binary
first.
One of the benefits of this is that it could enable having transfer
functions that create alarms (and reduce their state using the
alarm).
Note: having a state and option monad (state -> '(a * state) option) works
but it makes the transfer functions heavier to write. Maybe we should
raise an exception instead. *)moduleContext_Arity_Forward(Context:Context)=structtype'rar0=Context.t->'rtype('a,'r)ar1=Context.t->'a->'rtype('a,'b,'r)ar2=Context.t->'a->'b->'rtype('a,'b,'c,'r)ar3=Context.t->'a->'b->'c->'rtype('a,'r)variadic=Context.t->'alist->'rend(* Monadic arity is what we should be aiming to get. *)moduleMonadic_Context(Context:Context)=struct(* This is used to combine complex expressions. *)let(let*)mf=functx->let(v,ctx)=mctxinfvctx;;type'rar0=Context.t->('r*Context.t)type('a,'r)ar1='a->Context.t->('r*Context.t)type('a,'b,'r)ar2='a->'b->Context.t->('r*Context.t)type('a,'b,'c,'r)ar3='a->'b->'c->Context.t->('r*Context.t)type('a,'r)variadic='alist->Context.t->('r*Context.t)end(* General design notes:
- Functors should state its minimal needs and provides the maximum
features. But optional features of a functor, depending on an
optional feature of an argument, should be done using "additional
features functor". This sometimes make their use impractical, as is
the multiplication of possible configurations.
For this reason, we tend to have the "base case" require more
features than strictly necessary, when these extra features can be
easily implemented using an approximate version (like the is_empty
functions).
TODO: This is the case for backward propagation functions, so they
could always be required.
- The types are always lifted, so that it is easy to add type
equality/substitution in module constraints. Modules are only used
for things that are not supposed to be changed, because it is
difficult to "insert" additional values inside a submodule. That is
why we do not use recursive submodules.
- There is a hierarchy of types: boolean is standalone, binary
depends on boolean, and memory depends on both.
- When including modules with additional components (e.g. size_int
for Ref_Addr), always put the component with largest fields at the
beginning, else destructive substitution of modules does not work.
- The main purpose of this file is to provide a "standard" way to
name the operations on domains. *)(* Note: for simplicity, we now just require everybody to provide "BASE";
we have helper "Assert_False" modules to fill the holes when needed. *)(****************************************************************)(** {1 Forward transfer functions} *)moduletypeWith_Boolean_Forward=sigtypebooleanmoduleContext:ContextmoduleBoolean_Forward:Operator.BOOLEAN_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanendmoduletypeWith_Integer_Forward=sigtypebooleantypeintegermoduleContext:ContextmoduleInteger_Forward:Operator.INTEGER_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanandtypeinteger:=integerendmoduletypeWith_Binary_Forward=sigtypebooleantypebinarymoduleContext:ContextmoduleBinary_Forward:Operator.BINARY_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanandtypebinary:=binaryendmoduletypeWith_Enum_Forward=sigtypebooleantypeenummoduleContext:ContextmoduleEnum_Forward:Operator.ENUM_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanandtypeenum:=enumend(****************************************************************)(** {1 Queries} *)(** Queries allow to ask the domain an overapproximation of the set of
concrete objects to which a dimension concretizes. This set of
object must be finitely represented, but the choice of this
representation is left to the domain. It is required that these
objects can be converted to some standard representations.
In addition, we require this set of object to be represented by a
lattice, so that it is possible to test inclusion and perform
approximation of union on these set of objects. *)(** Note: since {!Lattices.Quadrivalent} exactly represents the powerset of
[{true,false}], there is no point in using another type. *)moduletypeBoolean_Lattice=Lattices.Sig.BOOLEAN_LATTICEwithtypet=Lattices.Quadrivalent.tmoduletypeInteger_Lattice=Lattices.Sig.INTEGER_LATTICEmoduletypeEnum_Lattice=Lattices.Sig.ENUM_LATTICEmoduletypeBinary_Lattice=Lattices.Sig.BITVECTOR_LATTICEmoduletypeInteger_Query=sigtypeabstract_state(* Context. *)moduleInteger_Lattice:Integer_Latticetypeintegervalquery:abstract_state->integer->Integer_Lattice.tend(* TODO: Should be structured like Integer_Queries; this is more modular. *)moduletypeWITH_QUERIES=sigmoduleContext:ContexttypebinarytypeenummoduleQuery:sigmoduleBinary_Lattice:Binary_Latticevalbinary:size:In_bits.t->Context.t->binary->Binary_Lattice.tmoduleEnum_Lattice:Enum_Latticevalenum:Context.t->enum->Enum_Lattice.tendendmoduletypeWith_Types=sigmoduleContext:Contexttypebinary(** Returns an unknown value with a given type. *)valbinary_unknown_typed:size:In_bits.t->Context.t->Types.TypedC.typ->binaryend(****************************************************************)(** {1 Other extensions} *)moduletypeWith_Partitionning=sigtype'adecisiontypeboolean(** The function goes from the strategy to the partitionning map;
this requires the polymorphic argument. *)valboolean_split:('a->'a->'adecision)->boolean->booleanend(****************************************************************)(** {2 Context} *)moduletypeWith_Context=sigmoduleContext:Context(** Opens a new context, corresponding to the initial scope. *)valroot_context:unit->Context.t(** Dumps what is known about the current context: useful for debugging. *)valcontext_pretty:Format.formatter->Context.t->unitend(****************************************************************)(** {2 Guards} *)moduletypeWith_Assume=sigtypebooleanmoduleContext:Context(** Corresponds to the creation of a new basic block, accessible only
if the condition is met.
@raises Bottom *)valassume:Context.t->boolean->Context.toptionend(****************************************************************)(** {2 Fixpoint iteration}
Fixpoint iteration, and base for all abstract domains. *)moduletypeWith_Nondet=sigmoduleContext:Context(** This joins two basic blocks and returns a new basic block. The
{!Context.in_tuple} and {!Context.out_tuple} corresponds to the phi operations in SSA. *)valtyped_nondet2:Context.t->Context.t->'aContext.in_tuple->Context.t*'aContext.out_tuple(** Additionally, one may compute a non-deterministic choice between
two values in the same basic block
It can be seen as equivalent as calling {!typed_nondet2} by passing the same context twice,
which would return the same context. *)valnondet_same_context:Context.t->'aContext.in_tuple->'aContext.out_tuple(* Note: this function imperatively modifies the context.
It should return a new context; this should be done when
load and store will return a new context. *)end(** An integer uniquely identifying a widening point.
See "Compiling with Abstract Interpretation", Lesbre&Lemerre, PLDI 2024. *)moduleWidening_Id:sigtypet=privateintvalfresh:unit->tend=structtypet=intletcount=ref0letfresh()=letv=incrcount;!countinvend(* Note: all domains could have thee same interface; but some would
have assert false and unit for the types they do not handle. We
could even list the types that are handled by a domain, and check
that everything is correct on functor instantiation. E.g. a functor
translating memory to binary know how to handle binary and memory,
and requires binary; it is able to pass the other types through.
Other things could be checked, e.g. whether the domain is
relational or not (possible optimizations), or if bottom is
handled/necessary (evaluating domains do not require elements below
to have a bottom.) *)moduletypeWith_Fixpoint_Computation=sigmoduleContext:Context(** Opening a new context can also be seen as opening a new scope:
new variables can be introduced, but variables of the parent
scope can still be seen. *)valmu_context_open:Context.t->Context.t(** Fixpoint step is a combination of inclusion checking +
widening.
@param init is the context leading to the loop entry,
@param arg is the context at the loop entry (obtained by {!mu_context_open} or by the last fixpoint_step operation)
@param body is the context at the end of the loop
Also takes a boolean and a tuple of values which is the result of the
evaluation of the body (the end of the loop).
Internally, it stores the arguments of the mu.
@returns a boolean which says if the fixpoint is reached, and a
function. If the fixpoint is reached, we can "close" the mu, and
the function returns a tuple corresponding to the mu. We can
always "restart" the mu, in which case the function returns a new
arg. *)(* MAYBE: return other informations with the tuple, for instance if
we detect that the function is finitely unrollable. *)valtyped_fixpoint_step:iteration:int->init:Context.t->arg:Context.t->body:Context.t->(bool*'aContext.in_tuple)->bool*(close:bool->'aContext.out_tuple*Context.t)(** [widened_fixpoint_step ~previous ~next (bool,in_tuple)] where:
- [widening_id] is a unique representation of the widening point;
- [previous] is the previous domain state;
- [next] is the next domain state obtained after execution of the function body;
- [bool] is false if we know that the fixpoint is not reached yet, and true otherwise;
- [in_tuple] is the argument of the SSA phi function;
returns a triple [(context,bool,out_tuple)] where:
- [context] is the new domain state;
- [bool] is true if the fixpoint is reached, and false if not reached or we don't know;
- [out_tuple] is the result of the SSA phi function. *)valwidened_fixpoint_step:widening_id:Widening_Id.t->previous:Context.t->next:Context.t->(bool*'aContext.in_tuple)->(Context.t*bool*'aContext.out_tuple)endmoduleFresh_id:sigtypet=privateintvalfresh:string->t(* Arg: the name of the module; may be useful later. *)end=structtypet=intletcount=ref0letfreshname=letv=incrcount;!countinLog.debug(funp->p"Registering domain %s with id %d"namev);vend(** Identifying domains. *)moduletypeWith_Id=sigvalunique_id:unit->Fresh_id.tvalname:unit->stringend(** {1 Optional types that can be used in the domain} *)(****************************************************************)(** BASE module types describing operations on one or several types of terms. *)(** Notes on the base operations:
- Pretty is required everywhere, and used for debugging.
- Equality refers to equality of the concretization. It can be
approximate, i.e. it is ok to return false when we cannot detect
that elements are equal; however when used as keys of
datastructures, equality should probably at least return true for
elements that are (==).
- TODO: Do compare and hash have to respect equality? Map and Set
do not need "equal", but Hashtbl does. So it seems that at least
hash should respect equality, i.e. equal elements should have the
same hash; which is not obvious when structurally different
elements are detected as equal (e.g. different representations of
empty). Or maybe it does not need, but in this case it is
undefined whether different abstract values with same
concretization represent different binding in the table (if by
chance the hash is the same, they will share a binding; else they
may have different bindings).
- compare and hash do not need to be implemented if the
datastructures are not used.
*)(** We document the boolean cases, as integer are pretty similar. *)moduletypeWith_Boolean=sigmoduleContext:ContexttypebooleanmoduleBoolean:Datatype_sig.Swithtypet=booleanvalboolean_pretty:Context.t->Format.formatter->boolean->unitvalserialize_boolean:Context.t->boolean->Context.t->boolean->'aContext.in_acc->(boolean,'a)Context.result(** Empty denotes that the concretization has no value (or it is
the concrete value representing the absence of value). Note
that this does not necessarily imply that some error occured;
for instance the offset of an uninitialized pointer can be
represented with empty. Emptyness testing is a simple way of
communicating between domains. *)valboolean_empty:Context.t->boolean(* TODO: get rid of these levels, the context suffices now that it is flow-sensitive. *)valboolean_unknown:Context.t->booleanmoduleBoolean_Forward:Operator.BOOLEAN_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanvalquery_boolean:Context.t->boolean->Lattices.Quadrivalent.tendmoduletypeWith_Integer=sigmoduleContext:ContexttypeintegertypebooleanmoduleInteger:Datatype_sig.Swithtypet=integer(* TODO: An "integer_value", that returns a range (and congruence
information) of the value. And perhaps another representation
that uses multi-intervals. *)(** Can return true if provably empty; false is always safe. *)valinteger_is_empty:Context.t->integer->boolvalinteger_pretty:Context.t->Format.formatter->integer->unitvalserialize_integer:widens:bool->Context.t->integer->Context.t->integer->'aContext.in_acc->(integer,'a)Context.resultvalinteger_empty:Context.t->integervalinteger_unknown:Context.t->integermoduleInteger_Forward:Operator.INTEGER_FORWARDwithmoduleArity:=Context_Arity_Forward(Context)andtypeboolean:=booleanandtypeinteger:=integermoduleInteger_Query:Integer_Querywithtypeabstract_state:=Context.tandtypeinteger:=integerendmoduletypeWith_Binary=sigmoduleContext:ContexttypebinarytypebooleanmoduleBinary:Datatype_sig.Swithtypet=binaryvalbinary_pretty:size:In_bits.t->Context.t->Format.formatter->binary->unitvalserialize_binary:widens:bool->size:In_bits.t->Context.t->binary->Context.t->binary->'aContext.in_acc->(binary,'a)Context.resultvalbinary_empty:size:In_bits.t->Context.t->binaryvalbinary_unknown:size:In_bits.t->Context.t->binaryincludeWith_Binary_ForwardwithmoduleContext:=Contextandtypebinary:=binaryandtypeboolean:=booleanendmoduletypeWith_Enum=sigmoduleContext:ContexttypebooleantypeenummoduleEnum:Datatype_sig.Swithtypet=enumvalenum_pretty:Context.t->Format.formatter->enum->unitvalserialize_enum:Context.t->enum->Context.t->enum->'aContext.in_acc->(enum,'a)Context.resultvalenum_empty:Context.t->enumvalenum_unknown:enumsize:int->Context.t->enumincludeWith_Enum_ForwardwithmoduleContext:=Contextandtypeenum:=enumandtypeboolean:=booleanend(** {1 Complete instantiations} *)(** This signature is useful when we don't have any new flow-sensitive state and just
need all the things on the top of the stack to stay the same. *)moduletypeMinimal_No_Boolean=sigincludeWith_IdincludeWith_Contexttypeboolean(** Guards *)includeWith_AssumewithmoduleContext:=Contextandtypeboolean:=boolean(** Joining variables together. *)includeWith_NondetwithmoduleContext:=Context(** Fixpoint computation. *)includeWith_Fixpoint_ComputationwithmoduleContext:=Contextend(** This signature does not have pre-built values, except booleans. *)moduletypeMinimal=sigincludeMinimal_No_Boolean(** The boolean domain should be present everywhere, as we need it or guards. *)includeWith_BooleanwithmoduleContext:=Contextandtypeboolean:=booleanend(* Note: We could have store a "current_context" context inside the
domain, which would avoids the need to pass context,mu_context
etc. everytime. *)(* Note: we could now organize this differently, using different types. *)moduletypeBASE=sigincludeMinimaltypebinarytypeenumincludeWITH_QUERIESwithmoduleContext:=Contextandtypebinary:=binaryandtypeenum:=enumincludeWith_TypeswithmoduleContext:=Contextandtypebinary:=binaryincludeWith_BinarywithmoduleContext:=Contextandtypebinary:=binaryandtypeboolean:=booleanincludeWith_EnumwithmoduleContext:=Contextandtypeenum:=enumandtypeboolean:=boolean(** Set operations. Note that we do not distinguish binary from binary sets.
Note that union reuses the serialize machinery. *)valunion:Operator.Condition.t->Context.t->'aContext.in_tuple->'aContext.out_tuple(** Check if an assertion is satisfiable (i.e. a trace exists that makes it true). *)valsatisfiable:Context.t->boolean->Smtbackend.Smtlib_sig.satend(** These are functions that can be implemented using the base signatures.
See Domain_extend for instantiation. *)moduletypeExt=sigmoduleContext:Contexttypeboolean(** Because the transfer functions imperatively change the context,
they cannot use assume, that returns a new context. Temporarily,
we provide this instead (it should be applied only to fresh
symbolic variables and not modify the set of valuations of the other symbolic variables.
In particular, the condition must never make the context bottom).
The good long-term solution would be to make every transfer
function return a new Context.t option, viewing the context as
some state monad. *)valimperative_assume:Context.t->boolean->unit(* TODO: may_be_true using assign; join_value; the ability to make temporary copies when we make queries; etc.
with_copy (fun ctx -> ... returns true if we should copy the ctx, and the value that we want)
*)endmoduletypeBASE_WITH_INTEGER=sigincludeBASEincludeWith_IntegerwithmoduleContext:=Contextandtypeboolean:=booleanend(****************************************************************)(** {1 Context conversions} *)(** Context conversion procedures: pass through the values by just
changing the context. *)moduletypeConvert_Contexts=sigmoduleFrom:ContextmoduleTo:Contextvalconvert:From.t->To.tendmoduleMake_Convert(C:Convert_Contexts)=struct(* Note: context conversion goes in the opposite direction than
transfer function conversion.*)moduleFrom_Arity=Context_Arity_Forward(C.To)moduleTo_Arity=Context_Arity_Forward(C.From)letar0fctx=f(C.convertctx)letar1=ar0letar2=ar0letar3=ar0letvariadic=ar0endmoduleConvert_Boolean_Forward(C:Convert_Contexts)(D:With_Boolean_ForwardwithmoduleContext=C.To)=structmoduleC=Make_Convert(C)moduleF=structincludeD;;includeD.Boolean_ForwardendincludeOperator.Conversions.Convert_Boolean_Forward(C)(F)endmoduleConvert_Integer_Forward(C:Convert_Contexts)(D:With_Integer_ForwardwithmoduleContext=C.To)=structmoduleC=Make_Convert(C)moduleF=structincludeD;;includeD.Integer_ForwardendincludeOperator.Conversions.Convert_Integer_Forward(C)(F)endmoduleConvert_Binary_Forward(C:Convert_Contexts)(D:With_Binary_ForwardwithmoduleContext=C.To)=structmoduleC=Make_Convert(C)moduleF=structincludeD;;includeD.Binary_ForwardendincludeOperator.Conversions.Convert_Binary_Forward(C)(F)endmoduleConvert_Enum_Forward(C:Convert_Contexts)(D:With_Enum_ForwardwithmoduleContext=C.To)=structmoduleC=Make_Convert(C)moduleF=structincludeD;;includeD.Enum_ForwardendincludeOperator.Conversions.Convert_Enum_Forward(C)(F)end(** This will help to the transition in a top-down manner, starting
from the translation and top-level domain to the lower-level
domain.
The idea is to support both interfaces, and use conversion to
simplify the support for both. I can have a signature for both
domains, and an "AddMonadic" functor to support both domains. *)moduleConvert_to_monadic(D:BASE)=structmoduleConversion=structmoduleFrom_Arity=Context_Arity_Forward(D.Context);;moduleTo_Arity=Monadic_Context(D.Context);;letar0f=(functx->fctx,ctx)letar1f=(funactx->fctxa,ctx)letar2f=(funabctx->fctxab,ctx)letar3f=(funabcctx->fctxabc,ctx)endmoduleTypes=structtypeboolean=D.booleantypebinary=D.binarytypeenum=D.enumendmoduleBoolean_Forward=Operator.Conversions.Convert_Boolean_Forward(Conversion)(structincludeTypesincludeD.Boolean_Forwardend)moduleBinary_Forward=Operator.Conversions.Convert_Binary_Forward(Conversion)(structincludeTypesincludeD.Binary_Forwardend)moduleEnum_Forward=Operator.Conversions.Convert_Enum_Forward(Conversion)(structincludeTypesincludeD.Enum_Forwardend)end