123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288(*
* $Id$
* Copyright (c) 2003, Hugues Cassé <hugues.casse@laposte.net>
*
* Library entry point..
*)(* Old history.
*
* 1.0 2.18.99 Hugues Cassé First release.
* 2.0 3.22.99 Hugues Cassé Full ANSI C and GCC attributes supported.
* Cprint improved.
* 2.1 2.18.04 Hugues Cassé A lot of improvement: improved parse
* arguments allowing preprocessing and any
* channel, converison to XML, support for
* unknown types in typedef.
*)letversion="FrontC 2.1 2.18.04 Hugues Cassé"openCabs(**
* FrontC is an OCAML library providing facilities for parsing source file
* in C language.
*
* Although it is designed for parsing ANSI C, it provides also support for
* old K&R C style and for some GCC extensions.
*
* It provides also a limited degraded mode allowing to parse file although
* all type information is not available and preprocessor directives are still
* in the source.
*
* @author Hugues Cassé <hugues.casse\@laposte.net>
*)(* !!TODO!!
Add option support:
- Support unknown types.
- Support for GC specifics (attributes, __builtin_va_list).
- Replace the input handler by a structure.
*)(**
* Parameters for building the reader handler.
*)typeparsing_arg=FROM_STDIN(** Parse the standard input. *)|FROM_CHANNELofin_channel(** Parse the given channel. *)|FROM_FILEofstring(** Parse the given file. *)|USE_CPP(** Use the C preprocessor. *)|PREPROCofstring(** Path to the preprocessor. *)|DEFofstring(** Pass this definition to CPP. *)|UNDEFofstring(** Undefine the given symbol for CPP. *)|INCLUDEofstring(** Include the given file by the CPP. *)|INCLUDE_DIRofstring(** Use the given directory for retrieving includes. *)|OPTIONofstring(** Pass the given option directl to the CPP. *)|ERRORofout_channel(** Use the given channel for outputting errors. *)|INTERACTIVEofbool(** Is this session interactive (from console). *)|GCC_SUPPORTofbool(** Support some extensions of the GCC compiler (default to true). *)|LINE_RECORDofbool(** Record line numbers in the C abstract trees (default to false). *)(**
* Result of a parsing.
*)typeparsing_result=PARSING_ERROR(** Parsing failure. Error outputted. *)|PARSING_OKofCabs.definitionlist(** Success. Return list of read definitions. *)(**
* Transform an old K&R C function definition into a new ANSI one.
* @param def Old function definition.
* @return New function definition.
* @raise UnconsistentDef Raised when an undeclared parameter is found
* in the function definition.
*)lettrans_old_fun_def(def:single_name*name_grouplist*body)=letint_type=INT(NO_SIZE,NO_SIGN)inlet((_type,store,name),par_types,body)=definlet(ident,full_type,attrs,exp)=nameinlet(rtype,par_names,vararg)=matchfull_typewithOLD_PROTOproto->proto|_->raiseUnconsistentDefinletprocess_group(rtype,store,names)=List.map(fun(name,_type,_,_)->(name,(rtype,store,_type)))namesinletpar_defs=List.flatten(List.mapprocess_grouppar_types)inletprocess_namename=let(rtype,store,ftype)=tryList.assocnamepar_defswithNot_found->(int_type,NO_STORAGE,int_type)in(rtype,store,(name,ftype,[],NOTHING))inletrecnormalize_type_type=match_typewithNO_TYPE->int_type|CONST_type->CONST(normalize_type_type)|VOLATILE_type->VOLATILE(normalize_type_type)|GNU_TYPE(attrs,_type)->GNU_TYPE(attrs,normalize_type_type)|PTR_type->PTR(normalize_type_type)|RESTRICT_PTR_type->RESTRICT_PTR(normalize_type_type)|ARRAY(_type,size)->ARRAY(normalize_type_type,size)|_->_typeinletfpars=List.mapprocess_namepar_namesinletproto=PROTO(normalize_typertype,fpars,vararg)inFUNDEF((normalize_type_type,store,(ident,proto,attrs,exp)),body)(**
* Transform all old function definition into new ones.
* @param defs Defs to transform.
* @return Definitions with all old function definitions transformed.
* @raise UnconsistentDef Raised if some old function definition does not
* define the type of a parameter.
*)letrectrans_old_fun_defsdefs=matchdefswith[]->[]|(OLDFUNDEF(dec,pars,body))::defs->(trans_old_fun_def(dec,pars,body))::(trans_old_fun_defsdefs)|def::defs->def::(trans_old_fun_defsdefs)(**
* Convert the given C file abstract repersentation into XML.
* @param file C file to convert.
* @return XML document result of conversion.
* @raise UnconsistentDef Raised if the file contains some old function
* definition whose one parameter is not defined.
*)letconvert_to_xmlfile=letsafe_file=trans_old_fun_defsfileinletchildren=List.flatten(List.mapCtoxml.convert_defsafe_file)inletelt=Cxml.new_elt"file"[]childreninCxml.new_simple_doceltletparseargs=leterror=refstderrinletinput=refstdininletcpp_cmd=ref"cpp"inletcpp_opts=ref""inletcpp_use=reffalseinletfile=ref""inletinteractive=reffalseinletgcc=reftrueinletlinerec=reffalsein(* Scan the arguments *)letrecscanargs=matchargswith[]->()|FROM_STDIN::tl->input:=stdin;scantl|(FROM_FILEpath)::tl->file:=path;scantl|(FROM_CHANNELchan)::tl->input:=chan;scantl|USE_CPP::tl->cpp_use:=true;scantl|(PREPROCcmd)::tl->cpp_cmd:=cmd;scantl|(DEFdef)::tl->cpp_opts:=!cpp_opts^" -D"^def;scantl|(UNDEFundef)::tl->cpp_opts:=!cpp_opts^" -U"^undef;scantl|(INCLUDEfile)::tl->cpp_opts:=!cpp_opts^" -i"^file;scantl|(INCLUDE_DIRdir)::tl->cpp_opts:=!cpp_opts^" -I"^dir;scantl|(OPTIONopt)::tl->cpp_opts:=!cpp_opts^" "^opt;scantl|(ERRORchan)::tl->error:=chan;scantl|(INTERACTIVEinter)::tl->interactive:=inter;scantl|(GCC_SUPPORTv)::tl->gcc:=v;scantl|(LINE_RECORDv)::tl->linerec:=v;scantlinlet_=scanargsin(* Build the input *)let(real_input,close)=ifnot!cpp_usethenif!file=""then(!input,false)else(open_in!file,true)elseletcmd=!cpp_cmd^" "^!cpp_opts^" "^!filein(Unix.open_process_incmd,true)in(* Perform the parse *)letresult=tryClexer.init{Clexer.h_interactive=!interactive;Clexer.h_in_channel=real_input;Clexer.h_line="";Clexer.h_buffer="";Clexer.h_pos=0;Clexer.h_lineno=0;Clexer.h_out_channel=!error;Clexer.h_file_name=!file;Clexer.h_gcc=!gcc;Clexer.h_linerec=!linerec;};PARSING_OK(Cparser.fileClexer.initial(Lexing.from_function(Clexer.get_bufferClexer.current_handle)))withCparser.Error->PARSING_ERROR|Cabs.BadType->Clexer.display_semantic_error"mal-formed type";PARSING_ERROR|Cabs.BadModifier->Clexer.display_semantic_error"mal-formed modifier";PARSING_ERRORin(* Cleanup *)ifclosethenclose_inreal_input;result(**
* Parse the input channel in interactive way, that is, as coming from the
* console. Error are displayed in a specific way.
* @param inp Input to read the C source from.
* @param out For outputting errors.
* @return Read C definitions.
*)letparse_interactive(inp:in_channel)(out:out_channel):parsing_result=parse[FROM_CHANNELinp;ERRORout;INTERACTIVEtrue](**
* Parse the C source from the console. It exactly equals to
* "parse_interactive stdin stderr".
* @return Read C definitions.
*)letparse_console_:parsing_result=parse_interactivestdinstderr(**
* Parse the C source from a non-interactive channel. It may be useful when
* the source come from a piped channel from the C preprocessor, for example.*
* @param input Input channel to read the source from.
* @param out Output channel to display errors.
* @return Read C definitions.
*)letparse_channel(input:in_channel)(out:out_channel):parsing_result=parse[FROM_CHANNELinput;ERRORout](**
* Parse a C source passed as a file path.
* @param file_name Path of the file to read.
* @param out Channel used for displaying errors.
* @return Read C definitions.
*
* NOTE: an error during the read of the file returned as a parse failure.
*)letparse_file(file_name:string)(out:out_channel):parsing_result=tryparse[FROM_FILEfile_name;ERRORout]with(Sys_errormsg)->output_stringout("Error while opening "^file_name^": "^msg^"\n");PARSING_ERROR