123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235(******************************************************************************)(* *)(* Menhir *)(* *)(* Copyright Inria. All rights reserved. This file is distributed under *)(* the terms of the GNU Library General Public License version 2, with a *)(* special exception on linking, as described in the file LICENSE. *)(* *)(******************************************************************************)openGSSmoduleMake(T:sig(**We exploit the fact that LR(1) states are integers. *)typestate=privateint(**The type of semantic values is arbitrary. *)typesemvend)=structopenTtypenode=(state,semv)GSS.node(* The set of the top nodes. *)(* A node in the most recent generation (that is, the current generation) is
known as a top node. It can also be thought of as the root of an LR stack. *)(* Two nodes in the same generation cannot correspond to the same state; that
is, they cannot have the same [state] field. Therefore, in particular, two
top nodes cannot have the same state. *)(* A mutable data structure, [tops], keeps track of the set of the top nodes.
This data structure must support the following operations:
- iterating over all top nodes (in the current generation);
- determining whether a top node with a certain state number exists;
- inserting a new top node;
- iterating over all top nodes in the previous generation;
- switching to a fresh new generation. *)(* To support iteration, we use an array of nodes. We could also use a linked
list, but a vector is more compact. Because the number of nodes is bounded
by the number of states, a dynamically resizable vector is not needed; the
vector can be stored inside a fixed-size array.
To support constant-time lookups by state number, we use an array, indexed
by states. Inside each array slot, we do not store an optional node, as one
might expect; instead we store a node. A current entry and an outdated
entry can be distinguished by testing the field [date]. This implies that
the array never needs to be reset.
To support iterating over the previous generation, we keep a second array
of nodes. To support fast switching to a new generation, we simply swap
the two arrays. *)typet={mutablecount:int;(**[count] is the number of top nodes in the current generation. This number
can grow as new nodes as registered. *)mutablenodes:nodearray;(**[nodes] is the set of top nodes in the current generation. These nodes
are stored in the array [nodes] in the interval [\[0, count)]. Together,
the fields [count] and [nodes] form a vector, that is, a variable-size
array. *)mutablenow:int;(**The current date; that is, the number of the current generation. *)table:nodearray;(**[table] is a finite map of state numbers to nodes. If the node [node]
found at offset [s] in the table is such that [node.date = now]
then it is the top node that corresponds to the state [s]. Otherwise
it is a stale node and should be ignored. *)mutableprev_count:int;(**[prev_count] is the number of top nodes in the previous generation. *)mutableprev_nodes:nodearray;(**[prev_nodes] is the set of top nodes in the previous generation. They are
stored in the array [prev_nodes] in the interval [\[0, prev_count)]. *)}letchecktops=letn=Array.lengthtops.nodesinassert(Array.lengthtops.table=n);assert(Array.lengthtops.prev_nodes=n);assert(0<=tops.count&&tops.count<=n);assert(0<=tops.prev_count&&tops.prev_count<=n);(* Check that every node in [tops.nodes] is current and is present in
the table [states]. Mark which states are represented. Check that
no state is represented twice. *)letmark=Array.makenNoneinfori=0totops.count-1doletnode=tops.nodes.(i)inassert(node.date=tops.now);lets=(node.state:>int)inassert(tops.table.(s)==node);assert(mark.(s)=None);mark.(s)<-Somenodedone;(* Check that every node in [tops.table] is present in [tops.nodes]. *)fors=0ton-1doletnode=tops.table.(s)inassert(node.date<=tops.now);ifnode.date=tops.nowthenmatchmark.(s)with|None->assertfalse|Somenode'->assert(node==node')done;(* Check that every node in [tops.prev_nodes] belongs in the previous
generation. *)fori=0totops.prev_count-1doletnode=tops.prev_nodes.(i)inassert(node.date=tops.now-1)done(* The node [sentinel] is used to fill the logically empty slots of the arrays
[nodes], [table], and [prev_nodes]. The main thing that matters about this
node is that its [date] field must contain a recognizably invalid value. We
use [-1]. *)(* Our use of [Obj.magic] to obtain a dummy state is not really dangerous,
considering that states are really integers. *)letsentinel={state=(Obj.magic0:state);date=-1;edges=MiniBabySet.empty;ddepth=0;}letcreaten=letcount=0andnodes=Array.makensentinelandnow=0andtable=Array.makensentinelandprev_count=0andprev_nodes=Array.makensentinelin{count;nodes;now;table;prev_count;prev_nodes}let[@inline]itertopsyield=fori=0totops.count-1doletnode=tops.nodes.(i)inyieldnodedonelet[@inline]for_alltopsp=(* We cannot use [Array.for_all] because we are interested in a segment
of the array, not in the whole array. *)leti=ref(tops.count-1)inwhile0<=!i&&ptops.nodes.(!i)doi:=!i-1done;!i=-1let[@inline]iter_prevtopsyield=fori=0totops.prev_count-1doletnode=tops.prev_nodes.(i)inyieldnodedoneletelements_prevtops=letnodes=ref[]initer_prevtops(funnode->nodes:=node::!nodes);!nodesletpresenttops(node:node)=lets=(node.state:>int)inassert(tops.table.(s).date<=tops.now);letpresent=(tops.table.(s).date=tops.now)inassert(ifpresentthennode==tops.table.(s)elsetrue);presentletabsenttopsnode=not(presenttopsnode)let[@inline]datetops=tops.nowlet[@inline]cardinaltops=tops.countlet[@inline]extract_singletontops=assert(cardinaltops=1);letnode=tops.nodes.(0)intops.count<-0;lets=(node.state:>int)intops.table.(s)<-sentinel;(* The previous generation should not be consulted.
Out of precaution, make it empty. *)tops.prev_count<-0;nodelet[@inline]registertops(node:node)=assert(node.date=tops.now);assert(absenttopsnode);(* Push this node into the vector. *)letcount=tops.countintops.nodes.(count)<-node;tops.count<-count+1;(* Register this node in the table. *)lets=(node.state:>int)intops.table.(s)<-nodelet[@inline]findtops(state:state)=lets=(state:>int)inletnode=tops.table.(s)inassert(node.date<=tops.now);ifnode.date=tops.nowthenSomenodeelseNonelet[@inline]bumptops=(* Bump the date. *)tops.now<-tops.now+1;(* Swap the arrays [nodes] and [prev_nodes]. *)letnodes=tops.nodesandprev_nodes=tops.prev_nodesintops.nodes<-prev_nodes;tops.prev_nodes<-nodes;(* Record the number of nodes in the previous generation. *)tops.prev_count<-tops.count;(* The current generation is fresh, therefore empty. *)tops.count<-0let[@inline]advancetopsdate=assert(cardinaltops=0);assert(tops.now<=date);(* Set the date. *)tops.now<-dateend(* Make *)