123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244(***************************************************************************)(* Morsmall *)(* A concise AST for POSIX shell *)(* *)(* Copyright (C) 2017,2018,2019 Yann Régis-Gianas, Ralf Treinen, *)(* Nicolas Jeannerod *)(* *)(* This program is free software: you can redistribute it and/or modify *)(* it under the terms of the GNU General Public License as published by *)(* the Free Software Foundation, either version 3 of the License, or *)(* (at your option) any later version. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU General Public License for more details. *)(* *)(* You should have received a copy of the GNU General Public License *)(* along with this program. If not, see <http://www.gnu.org/licenses/>. *)(***************************************************************************)(** abstract syntax of test expressions *)typeexpression=|Andofexpression*expression|Orofexpression*expression|Notofexpression|Binaryofstring*string*string(* (op,arg_left,arg_right) *)|Unaryofstring*string(* (op,arg) *)|Singleofstring(* arg *)exceptionParse_errortypetoken=|UnOpofstring(* unary operators -e, -f, etc. *)|BinOpofstring(* binary operators -eq, =, etc. *)|AndOp(* -a *)|OrOp(* -o *)|NotOp(* ! *)|ParL(* ( *)|ParR(* ) *)|BracketR(* ] *)|Stringofstring(* all the rest *)|EOFletto_tokens=matchswith(* file existence and type *)|"-e"|"-d"|"-f"|"-b"|"-c"|"-h"|"-L"|"-p"|"-S"->UnOps(* file attributes *)|"-g"|"-u"|"-s"|"-r"|"-w"|"-x"->UnOps(* GNU extension on files *)|"-G"|"-O"|"-k"->UnOps(* GNU extension on files *)|"-nt"|"-ot"|"-ef"->BinOps(* unary operators on strings *)|"-n"|"-z"->UnOps(* binary operators on strings *)|"="|"!="->BinOps(* binary operators on integers *)|"-eq"|"-ne"|"-gt"|"-ge"|"-lt"|"-le"->BinOps(* unary operator on file descriptor *)|"-t"->UnOps|"-a"->AndOp|"-o"->OrOp|"("->ParL|")"->ParR|"]"->BracketR|"!"->NotOp|_->Stringsletparse?(bracket=false)wl=lettokenbuf=wl|>List.mapMorbig.remove_quotes|>List.mapto_token|>refinletlookup()=match!tokenbufwith|h::_->h|[]->EOFinletpop()=match!tokenbufwith|_::r->tokenbuf:=r|[]->assertfalseinletrecparse_S()=letexp=parse_S'()inifbrackettheniflookup()=BracketRthenpop()elseraiseParse_error;iflookup()=EOFthenexpelseraiseParse_errorandparse_S'()=matchlookup()with|EOF|BracketR->None|_->Some(parse_disj())andparse_disj()=lethead=parse_conj()inmatchparse_disj'()with|None->head|Somerest->Or(head,rest)andparse_disj'()=matchlookup()with|EOF|BracketR|ParR->None|OrOp->pop();Some(parse_disj())|_->raiseParse_errorandparse_conj()=lethead=parse_literal()inmatchparse_conj'()with|None->head|Somerest->And(head,rest)andparse_conj'()=matchlookup()with|OrOp|EOF|BracketR|ParR->None|AndOp->pop();Some(parse_conj())|_->raiseParse_errorandparse_literal()=matchlookup()with|NotOp->pop();Not(parse_atom())|UnOp_|ParL|String_->parse_atom()|_->raiseParse_errorandparse_atom()=matchlookup()with|UnOpop->pop();(matchlookup()with|Strings->pop();Unary(op,s)|_->raiseParse_error)|ParL->pop();letexp=parse_disj()in(matchlookup()with|ParR->pop();exp|_->raiseParse_error)|Strings->pop();(matchparse_atom'()with|None->Singles|Some(binop,rightarg)->Binary(binop,s,rightarg))|_->raiseParse_errorandparse_atom'()=matchlookup()with|AndOp|OrOp|EOF|BracketR->None|BinOpbinop->pop();(matchlookup()with|Stringrightarg|UnOprightarg|BinOprightarg->pop();Some(binop,rightarg)|_->raiseParse_error)|_->raiseParse_errorinparse_S()(*
grammar of test expressions:
<S> -> EOF | <disj> EOF
<disj> -> <conj> | <conj> -o <disj>
<conj> -> <literal> | <literal> -a <conj>
<literal> -> <atom> | ! <atom>
<atom> -> string | unop string | string binop string | ( <disj> )
grammar in LL(1):
<S> -> <S'> EOF
<S'> -> EPSILON | <disj>
<disj> -> <conj> <disj'>
<disj'> -> EPSILON | -o <disj>
<conj> -> <literal> <conj'>
<conj'> -> EPSILON | -a <conj>
<literal> -> <atom> | ! <atom>
<atom> -> string <atom'> | unop string | ( <disj> )
<atom'> -> EPSILON | binop string
annulating non-terminals: { <disj'>, <conj'>, <atom'> }
nonterminal | Fi_1
------------+--------------------
<S> | string, unop, (, !
<disj> | string, unop, (, !
<disj'> | -o
<conj> | string, unop, (, !
<conj'> | -a
<literal> | string, unop, (, !
<atom> | string, unop, (
<atom'> | binop
right side | FIRST_1
-------------------+---------------------
<disj> EOF | string, unop, (, !
<conj> <disj'> | string, unop, (, !
-o <disj> | -o
<literal> <conj'> | string, unop, (, !
-a <conj> | -a
<atom> | string, unop, (
! <atom> | !
string <atom'> | string
unop string | unop
( <disj> ) | (
binop string | binop
nonterminal | FOLLOW_1
------------+--------------------
<S> | \emptyset
<disj> | EOF, )
<disj'> | EOF, )
<conj> | -o, EOF, )
<conj'> | -o, EOF, )
<literal> | -a, -o, EOF, )
<atom> | -a, -o, EOF, )
<atom'> | -a, -o, EOF, )
Hence we have the following requirements for being LL(1):
nonterminal | must be mutually disjoint
------------+--------------------------
<S> | ---
<disj> | ---
<disj'> | EOF, ), -o
<conj> | ---
<conj'> | -o, EOF, ), -a
<literal> | string, unop, (, !
<atom> | string, unop, (
<atom'> | -a, -o, EOF, ), binop
*)