123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353openCore_kernelopenRresulttypecolumn=|Intsofintarray|Int_optsofintoptionarray|Floatsoffloatarray|Float_optsoffloatoptionarray|Stringsofstringarray|String_optsofstringoptionarray[@@derivingshow]typet={nrows:int;ncols:int;cols:(string*column)list;}letcolumn_length=function|Intsxs->Array.lengthxs|Int_optsxs->Array.lengthxs|Floatsxs->Array.lengthxs|Float_optsxs->Array.lengthxs|Stringsxs->Array.lengthxs|String_optsxs->Array.lengthxsletmake=function|[]->Error(`Msg"empty dataframe")|(_,first_col)::tascols->letnrows=column_lengthfirst_colinifnot(List.for_allt~f:(fun(_,col)->column_lengthcol=nrows))thenError(`Msg"columns with different lengths")elsematchList.(find_a_dup~compare:String.compare(mapcols~f:fst))with|Somel->Rresult.R.error_msgf"duplicate label %S"l|None->letncols=List.lengthcolsinOk{nrows;ncols;cols}letcolumnsdf=df.colsletnrowst=t.nrowsletncolst=t.ncolsletget_col_by_namet=List.Assoc.findt.cols~equal:String.equalletget_colti=List.ntht.colsi|>Option.map~f:sndletparse_headerh=letlabels=String.split~on:'\t'hinlabels,List.lengthlabelsletfold_linesxs~init~f=letrecloopiacc=function|[]->Okacc|x::xs->matchfiaccxwith|Okr->loop(i+1)rxs|Error_ase->einloop0initxsletoptionallyf=function|"NA"->None|s->Some(fs)letrevconvfcol=tryArray.of_list_rev_mapcol~f|>Result.returnwith_->Error`Conversion_failureletrevconv_optf=revconv(optionallyf)lettry_withfx~ok~error=matchfxwith|Oky->oky|Errore->erroreletintsx=Intsxletint_optsx=Int_optsxletstringsx=Stringsxletstring_optsx=String_optsxletfloatsx=Floatsxletfloat_optsx=Float_optsxletguess_rev_convert_colcol=ifList.memcol"NA"~equal:String.equalthentry_with(revconv_optInt.of_string)col~ok:int_opts~error:(fun_->try_with(revconv_optFloat.of_string)col~ok:float_opts~error:(fun_->List.mapcol~f:Option.some|>Array.of_list|>string_opts))elsetry_with(revconvInt.of_string)col~ok:ints~error:(fun_->try_with(revconvFloat.of_string)col~ok:floats~error:(fun_->Array.of_list_revcol|>strings))letparse_lines~file_has_headerncolslinesf=letopenResult.Monad_infixinletorigin=1+iffile_has_headerthen1else0inletinit=0,List.initncols~f:(Fn.const[])infold_lineslines~init~f:(funi(nr,acc)l->letfields=String.splitl~on:'\t'inmatchList.map2fieldsacc~f:List.conswith|Okr->Ok(nr+1,r)|Unequal_lengths->Rresult.R.error_msgf"Line %d doesn't have the expected %d fields"(i+origin)ncols)>>|fun(nrows,cols)->f~nrows~list_of_reverted_columns:colstypeparse_result=(int*columnlist,[`Msgofstring])result[@@derivingshow]let%expect_test"Dataframe.parse_line ex1"=letguess_rev_convert_cols~nrows~list_of_reverted_columns:cols=nrows,List.mapcols~f:guess_rev_convert_colinletgot=parse_lines~file_has_header:false3["a\t1.2\tNA";"a\t1.2\t2";"c\t-1.2\tNA";]guess_rev_convert_colsinprint_endline(show_parse_resultgot);[%expect{|
(Ok (3,
[(Dataframe.Strings [|"a"; "a"; "c"|]);
(Dataframe.Floats [|1.2; 1.2; -1.2|]);
(Dataframe.Int_opts [|None; (Some 2); None|])])) |}]letcheck_header~colnamesheader=matchList.for_all2colnamesheader~f:String.equalwith|Oktrue->Ok()|Okfalse->Error(`Msg"header is different from expected value")|Unequal_lengths->Error(`Msg"incorrect number of columns")letfrom_file_gen?(header=`Read_in_file)pathf=letopenLet_syntax.Resultinletlines=In_channel.read_linespathinlet*labels,ncols,data_lines=matchheader,lineswith|(`Read_in_file|`Expect_),[]->Error(`Msg"empty file but expected header")|`Read_in_file,header::lines->letlabels,ncols=parse_headerheaderinOk(labels,ncols,lines)|`Expectcolnames,header::data_lines->letlabels,ncols=parse_headerheaderinlet+()=check_header~colnameslabelsinlabels,ncols,data_lines|`Usecolnames,data_lines->Ok(colnames,List.lengthcolnames,data_lines)|`None,(h::_asdata_lines)->letcolnames=String.splith~on:'\t'|>List.mapi~f:(funi_->sprintf"C%d"i)inOk(colnames,List.lengthcolnames,data_lines)|`None,[]->Ok([],0,[])inletfile_has_header=matchheaderwith|`Use_|`None->false|`Read_in_file|`Expect_->trueinparse_lines~file_has_headerncolsdata_lines(fun~nrows~list_of_reverted_columns->f~nrows~ncols~labels~list_of_reverted_columns)letfrom_file?headerpath=from_file_gen?headerpath(fun~nrows~ncols~labels~list_of_reverted_columns->letcols=List.map2_exnlabelslist_of_reverted_columns~f:(funlabelcol->label,guess_rev_convert_colcol)in{nrows;ncols;cols})moduleParser=structtypeerror=[|`Conversion_failure|`Msgofstring|`Not_enough_columns|`Too_many_columns|`Unexpected_labelofstring*string][@@derivingshow]type'at=|Return:'a->'at|Bind:'at*('a->'bt)->'bt|Column:string*(stringlist->('aarray,error)result)->'aarraytletreturnx=Returnxletbindx~f=Bind(x,f)letintslabel=Column(label,revconvInt.of_string)letfloatslabel=Column(label,revconvFloat.of_string)letstringslabel=Column(label,revconvFn.id)letint_optslabel=Column(label,revconv_optInt.of_string)letfloat_optslabel=Column(label,revconv_optFloat.of_string)letstring_optslabel=Column(label,revconv_optFn.id)let(let*)xf=Bind(x,f)let(let+)xf=Bind(x,funx->return(fx))letrecrun:typea.at->labels:stringlist->list_of_reverted_columns:stringlistlist->(a,[>error])result*stringlist*stringlistlist=funp~labels~list_of_reverted_columns->matchp,labels,list_of_reverted_columnswith|Returnx,_,_->Okx,labels,list_of_reverted_columns|Bind(x,f),_,_->(letr,labels,list_of_reverted_columns=runx~labels~list_of_reverted_columnsinmatchrwith|Okx->run(fx)~labels~list_of_reverted_columns|Errore->Errore,labels,list_of_reverted_columns)|Column_,[],_|Column_,_,[]->Error`Not_enough_columns,labels,list_of_reverted_columns|Column(col_label,col_conv),label::labels,rev_col::list_of_reverted_columns->letres=ifnot(String.equalcol_labellabel)thenError(`Unexpected_label(label,col_label))elsecol_convrev_colinres,labels,list_of_reverted_columnsendletfrom_file_parse~headerfnp=letopenLet_syntax.Resultinletheader=ifheaderthen`Read_in_fileelse`Noneinlet*res,labels,_=from_file_gen~headerfn(fun~nrows:_~ncols:_~labels~list_of_reverted_columns->Parser.run~labels~list_of_reverted_columnsp)inmatchres,labelswith|Ok_,_::_->Error`Too_many_columns|Ok_,_|Error_,_->reslet%expect_test"Dataframe.Parser.from_file_parse"=from_file_parse~header:true"../data/survival.tsv"Parser.(let*replicate=strings"replicate"inlet+nsurv=ints"Nsurv"in(replicate,nsurv))|>[%derive.show:(stringarray*intarray,Parser.error)result]|>print_endlinelet%expect_test"Dataframe.Parser.from_file_parse"=from_file_parse~header:true"../data/survival.tsv"Parser.(let*replicate=strings"replicates"inlet+nsurv=ints"Nsurvz"inreplicate,nsurv)|>[%derive.show:(stringarray*intarray,Parser.error)result]|>print_endlineexceptionErrorofstringmoduleEz=structletfrom_file?headerpath=Rresult.R.failwith_error_msg(from_file?headerpath)leterrormsg=raise(Errormsg)leterrorffmt=Printf.ksprintferrorfmtletby_intgen_func=gen_func~f:get_col~string_of_id:string_of_intletby_namegen_func=gen_func~f:get_col_by_name~string_of_id:Fn.idletget_ints_gen~f~string_of_iddfid=matchfdfidwith|Some(Intsxs)->xs|Some_->errorf"Column %s is not integer"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_ints=by_intget_ints_genletget_ints_by_name=by_nameget_ints_genletget_int_opts_gen~f~string_of_iddfid=matchfdfidwith|Some(Int_optsxs)->xs|Some_->errorf"Column %s is not integer with options"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_int_opts=by_intget_int_opts_genletget_int_opts_by_name=by_nameget_int_opts_genletget_floats_gen~f~string_of_iddfid=matchfdfidwith|Some(Floatsxs)->xs|Some_->errorf"Column %s is not float"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_floats=by_intget_floats_genletget_floats_by_name=by_nameget_floats_genletget_float_opts_gen~f~string_of_iddfid=matchfdfidwith|Some(Float_optsxs)->xs|Some_->errorf"Column %s is not float with options"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_float_opts=by_intget_float_opts_genletget_float_opts_by_name=by_nameget_float_opts_genletget_strings_gen~f~string_of_iddfid=matchfdfidwith|Some(Stringsxs)->xs|Some_->errorf"Column %s is not string"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_strings=by_intget_strings_genletget_strings_by_name=by_nameget_strings_genletget_string_opts_gen~f~string_of_iddfid=matchfdfidwith|Some(String_optsxs)->xs|Some_->errorf"Column %s is not string with options"(string_of_idid)|None->errorf"No column %s"(string_of_idid)letget_string_opts=by_intget_string_opts_genletget_string_opts_by_name=by_nameget_string_opts_genendtypehtml_formatter=int->string->Html_types.td_contentTyxml.Html.eltletto_html?(formatters=[])d=letopenTyxml.Htmlinletdefault_cell_s=txtsinletcols=List.mapd.cols~f:(fun(label,col)->letcell_renderer=List.Assoc.findformatters~equal:String.equallabel|>Option.value~default:default_cellinlabel,cell_renderer,col)inletthead=thead[tr(List.mapcols~f:(fun(label,_,_)->td[txtlabel]))]inletelemcoli=letdefault="NA"inmatchcolwith|Intst->Int.to_stringt.(i)|Int_optst->Option.value_map~default~f:Int.to_stringt.(i)|Floatst->Float.to_stringt.(i)|Float_optst->Option.value_map~default~f:Float.to_stringt.(i)|Stringst->t.(i)|String_optst->Option.value~defaultt.(i)inletrowi=List.mapcols~f:(fun(_,renderer,col)->td[rendereri(elemcoli)])|>trintable~thead(List.init(nrowsd)~f:row)