123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566openSearchspaceopenCollections.Utiltypedecision={chosen:int;choices:int}typerng=int->intlet(let*)=bindletrecrandom_walkrngspace=inspectspace|>function|Fail->([],None)|Resultx->([],Somex)|Forkchoices->letnum_choices=List.lengthchoicesinifnum_choices==0then([],None)elseifnum_choices==1thenletonly_choice=List.hdchoicesinrandom_walkrngonly_choiceelse(letchosen=rngnum_choicesinletchosen_el=List.nthchoiceschoseninlet(recursed_path,result)=random_walkrngchosen_elinletpath={chosen;choices=num_choices}::recursed_pathin(path,result))letdecision_to_string{chosen;choices}=Int.to_stringchosen^"/"^Int.to_stringchoicesletresult_to_stringto_string=function|Somex->"Found: "^to_stringx|None->"Failed"letwalk_to_stringto_string(path,result)="["^with_separatordecision_to_string", "path^"] => "^result_to_stringto_stringresultlet%expect_test"random_walk"=beginRandom.full_init[|0|];letsums=(let*n1=int_range15inlet*n2=int_range15inletsum=n1+n2in(* if sum mod 2==0 then *)return(Printf.sprintf"%d + %d = %d"n1n2sum)(* else
empty *))inletdo_testrng=letwalk=random_walkrngsumsin(Printf.printf"%s\n"(walk_to_stringFun.idwalk))in(Printf.printf"Always first: ";do_test(fun_->0);for_i=1to10doPrintf.printf("Random: ");do_testRandom.intdone;Printf.printf"Always last: ";do_test(funbound->bound-1));[%expect{|
Always first: [0/2, 0/2] => Found: 1 + 1 = 2
Random: [0/2, 0/2] => Found: 1 + 1 = 2
Random: [1/2, 0/2, 0/2] => Found: 2 + 1 = 3
Random: [0/2, 0/2] => Found: 1 + 1 = 2
Random: [1/2, 0/2, 1/2, 1/2, 1/2, 0/2] => Found: 2 + 4 = 6
Random: [0/2, 1/2, 1/2, 1/2, 1/2, 1/2] => Failed
Random: [0/2, 0/2] => Found: 1 + 1 = 2
Random: [1/2, 0/2, 0/2] => Found: 2 + 1 = 3
Random: [0/2, 1/2, 0/2] => Found: 1 + 2 = 3
Random: [0/2, 1/2, 1/2, 0/2] => Found: 1 + 3 = 4
Random: [0/2, 1/2, 1/2, 1/2, 0/2] => Found: 1 + 4 = 5
Always last: [1/2, 1/2, 1/2, 1/2, 1/2] => Failed |}]endlet%expect_test"decision tree for sums divisible by 7"=letopenSearchspaceinletpp=pp_decision_tree(Format.pp_print_string)Format.std_formatterinletsums_div7=let*n1=int_range14inlet*n2=int_range15inletsum=n1+n2inifsummod7=0thenreturn(Printf.sprintf"%d + %d = %d"n1n2sum)elseemptyinbeginsums_div7|>ppend;[%expect{|
choices
choices
FAIL
choices
FAIL
choices
FAIL
choices
FAIL
choices
FAIL
FAIL
choices
choices
FAIL
choices
FAIL
choices
FAIL
choices
FAIL
choices
2 + 5 = 7
FAIL
choices
choices
FAIL
choices
FAIL
choices
FAIL
choices
3 + 4 = 7
choices
FAIL
FAIL
choices
choices
FAIL
choices
FAIL
choices
4 + 3 = 7
choices
FAIL
choices
FAIL
FAIL
FAIL
|}]typestats={nodes:int;forks:int;fails:int;solutions:int;}letreccalculate_true_valuesspace=inspectspace|>function|Result_->{nodes=1;forks=0;solutions=1;fails=0}|Fail->{nodes=1;forks=0;solutions=0;fails=1}|Forkchoices->letchildren_stats=List.mapcalculate_true_valueschoicesinletnodes=1+List.fold_left(funaccs->acc+s.nodes)0children_statsinletforks=1+List.fold_left(funaccs->acc+s.forks)0children_statsinletsolutions=List.fold_left(funaccs->acc+s.solutions)0children_statsinletfails=List.fold_left(funaccs->acc+s.fails)0children_statsin{nodes;forks;solutions;fails}letsums_div7=let*n1=int_range14inlet*n2=int_range15inletsum=n1+n2inifsummod7=0thenreturn(Printf.sprintf"%d + %d = %d"n1n2sum)elseemptytype'anode={node_view:'aSearchspace.node_view;(* Cached inspected view of the searchspace *)mutablechildren:'anodeoptionarray;(* Children indexed by decision number; only some may be materialized *)mutablesamples:int;(* Number of samples passing through this node *)mutablenodes_estimate:float;(* Current best estimate for subtree size *)mutablefail_estimate:float;(* Final estimate for failures in this subtree *)mutablesolution_estimate:float;(* Final estimate for solutions in this subtree *)}letchild_average(children:'anodeoptionarray)(f:'anode->float):float=letmaterialized=Array.to_listchildren|>List.filter_map(func->c)inletavg=matchmaterializedwith|[]->1.0|xs->List.fold_left(+.)0.(List.mapfxs)/.float_of_int(List.lengthxs)inavgletchildren_estimate(children:'anodeoptionarray)(f:'anode->float):float=float_of_int(Array.lengthchildren)*.child_averagechildrenfletnum_choicesnode_view=matchnode_viewwith|Forkchoices->List.lengthchoices|_->0letcreate_node(space:'aSearchspace.t):'anode=letnode_view=inspectspaceinlet(nodes_estimate,fail_estimate,solution_estimate)=matchnode_viewwith|Result_->(1.0,0.0,1.0)|Fail->(1.0,1.0,0.0)|Fork_->(1.0,0.0,0.0)(* initial values for forks, will be updated by sampling *)in{node_view;children=Array.make(num_choicesnode_view)None;samples=0;nodes_estimate;fail_estimate;solution_estimate;}type'achild_selector='anode->intletuniform_selectornode=Random.int(Array.lengthnode.children)letsample_rate=function|Somechild->float_of_intchild.samples/.(child.fail_estimate+.child.solution_estimate)|None->0.0letundersampled_selector(node:'anode):int=letn=Array.lengthnode.childreninifn=0then0elseletrates=Array.initn(funi->sample_ratenode.children.(i))inletmin_rate=Array.fold_leftminrates.(0)ratesinletcandidates=List.filter(funi->abs_float(rates.(i)-.min_rate)<1e-8)(List.initnFun.id)inList.nthcandidates(Random.int(List.lengthcandidates))letweighted_selector(node:'anode):int=letn=Array.lengthnode.childreninifn=0then0elseletmaterialized=Array.to_listnode.children|>List.filter_map(func->c)inletavg=matchmaterializedwith|[]->1.0|xs->List.fold_left(+.)0.(List.map(func->c.nodes_estimate)xs)/.float_of_int(List.lengthxs)inletweights=Array.initn(funi->matchnode.children.(i)with|Somechild->maxchild.nodes_estimate1.0|None->avg)inlettotal=Array.fold_left(+.)0.0weightsinletr=Random.floattotalinletrecpickiacc=ifi>=nthenn-1elseifacc+.weights.(i)>=rthenielsepick(i+1)(acc+.weights.(i))inpick00.0letrecwalkselect_child(node:'anode):unit=node.samples<-node.samples+1;matchnode.node_viewwith|Fail|Result_->()|Forkchoices->letnum_choices=Array.lengthnode.childreninifnum_choices>0then(letchosen=select_childnodeinletchild_node=matchnode.children.(chosen)with|Somechild->child|None->letc=create_node(List.nthchoiceschosen)innode.children.(chosen)<-Somec;cinwalkselect_childchild_node;node.nodes_estimate<-1.+.children_estimatenode.children(funchild->child.nodes_estimate);node.fail_estimate<-children_estimatenode.children(funchild->child.fail_estimate);node.solution_estimate<-children_estimatenode.children(funchild->child.solution_estimate);)typeestimates={nodes:float;fails:float;solutions:float;materialized_nodes:int;}letreccount_materialized_nodes(node:'anode):int=matchnode.node_viewwith|Fork_->1+Array.fold_left(funaccchild_opt->matchchild_optwith|Somechild->acc+count_materialized_nodeschild|None->acc)0node.children|_->1letestimate?(selector=undersampled_selector)n_trials(space:'aSearchspace.t):estimates=letroot=create_nodespaceinfor_=1ton_trialsdowalkselectorrootdone;{nodes=root.nodes_estimate;fails=root.fail_estimate;solutions=root.solution_estimate;materialized_nodes=count_materialized_nodesroot;}let%expect_test"estimate number of nodes"=lettrue_values=calculate_true_valuessums_div7inPrintf.printf"True values\n";Printf.printf" number of nodes: %d\n"true_values.nodes;Printf.printf" number of fails: %d\n"true_values.fails;Printf.printf" number of solutions: %d\n"true_values.solutions;Printf.printf"\n";letestimates=estimate1000sums_div7inPrintf.printf"Estimated\n";Printf.printf" materialized nodes: %d\n"estimates.materialized_nodes;Printf.printf" number of nodes: %d\n"(int_of_float(estimates.nodes+.0.5));Printf.printf" number of fails: %d\n"(int_of_float(estimates.fails+.0.5));Printf.printf" number of solutions: %d\n"(int_of_float(estimates.solutions+.0.5));[%expect{|
True values
number of nodes: 49
number of fails: 22
number of solutions: 3
Estimated
materialized nodes: 49
number of nodes: 49
number of fails: 22
number of solutions: 3
|}]letrecbalanced_rangestartstop=ifstart>stopthenemptyelseifstart=stopthenreturnstartelseifstart+1=stopthenreturnstart++returnstopelseletmid=(start+stop)/2inbalanced_rangestartmid++balanced_range(mid+1)stoplet%expect_test"undersampling larger balanced searchspace"=letint_range=balanced_rangeinletright_heavy_space=(let*n1=int_range1100inlet*n2=int_range1100inletsum=return(n1+n2)insum|?>(funx->xmod7=0))inlettrue_values=calculate_true_valuesright_heavy_spaceinPrintf.printf"True values\n";Printf.printf" number of nodes: %d\n"true_values.nodes;Printf.printf" number of fails: %d\n"true_values.fails;Printf.printf" number of solutions: %d\n"true_values.solutions;Printf.printf"\n";forsamplers=1to5doletsamples=1000*samplersinPrintf.printf"Sample run %d:\n"samples;letestimates=estimatesamplesright_heavy_spaceinPrintf.printf"Estimated values balanced trees:\n";Printf.printf" materialized nodes: %d\n"estimates.materialized_nodes;Printf.printf" number of nodes: %d\n"(int_of_float(estimates.nodes+.0.5));Printf.printf" number of fails: %d\n"(int_of_float(estimates.fails+.0.5));Printf.printf" number of solutions: %d\n"(int_of_float(estimates.solutions+.0.5));Printf.printf"\n";done;[%expect{|
True values
number of nodes: 19999
number of fails: 8572
number of solutions: 1428
Sample run 1000:
Estimated values balanced trees:
materialized nodes: 5143
number of nodes: 19007
number of fails: 8228
number of solutions: 1276
Sample run 2000:
Estimated values balanced trees:
materialized nodes: 8301
number of nodes: 19187
number of fails: 8278
number of solutions: 1316
Sample run 3000:
Estimated values balanced trees:
materialized nodes: 10568
number of nodes: 18661
number of fails: 8013
number of solutions: 1318
Sample run 4000:
Estimated values balanced trees:
materialized nodes: 12268
number of nodes: 18439
number of fails: 7984
number of solutions: 1236
Sample run 5000:
Estimated values balanced trees:
materialized nodes: 13598
number of nodes: 17291
number of fails: 7448
number of solutions: 1198
|}]let%expect_test"undersampling larger unbalanced searchspace"=letright_heavy_space=(let*n1=int_range1100inlet*n2=int_range1100inletsum=return(n1+n2)insum|?>(funx->xmod7=0))inlettrue_values=calculate_true_valuesright_heavy_spaceinPrintf.printf"True values\n";Printf.printf" number of nodes: %d\n"true_values.nodes;Printf.printf" number of fails: %d\n"true_values.fails;Printf.printf" number of solutions: %d\n"true_values.solutions;Printf.printf"\n";forsamplers=1to5doletsamples=1000*samplersinPrintf.printf"Sample run %d:\n"samples;letestimates=estimatesamplesright_heavy_spaceinPrintf.printf"Estimated values (unbalanced trees):\n";Printf.printf" materialized nodes: %d\n"estimates.materialized_nodes;Printf.printf" number of nodes: %d\n"(int_of_float(estimates.nodes+.0.5));Printf.printf" number of fails: %d\n"(int_of_float(estimates.fails+.0.5));Printf.printf" number of solutions: %d\n"(int_of_float(estimates.solutions+.0.5));Printf.printf"\n";done;[%expect{|
True values
number of nodes: 20201
number of fails: 8673
number of solutions: 1428
Sample run 1000:
Estimated values (unbalanced trees):
materialized nodes: 2099
number of nodes: 2199
number of fails: 946
number of solutions: 154
Sample run 2000:
Estimated values (unbalanced trees):
materialized nodes: 4100
number of nodes: 4203
number of fails: 1798
number of solutions: 304
Sample run 3000:
Estimated values (unbalanced trees):
materialized nodes: 6100
number of nodes: 6203
number of fails: 2651
number of solutions: 451
Sample run 4000:
Estimated values (unbalanced trees):
materialized nodes: 8099
number of nodes: 8199
number of fails: 3514
number of solutions: 586
Sample run 5000:
Estimated values (unbalanced trees):
materialized nodes: 10099
number of nodes: 10199
number of fails: 4378
number of solutions: 722
|}](** Incremental estimator API implementation *)type'at={root:'anode;selector:'achild_selector;}letcreate?(selector=undersampled_selector)(space:'aSearchspace.t):'at={root=create_nodespace;selector}letsamplen(est:'at):unit=for_=1tondowalkest.selectorest.rootdoneletestimates(est:'at):estimates={nodes=est.root.nodes_estimate;fails=est.root.fail_estimate;solutions=est.root.solution_estimate;materialized_nodes=count_materialized_nodesest.root;}let%expect_test"incremental estimator API on unbalanced searchspace"=letright_heavy_space=(let*n1=int_range1100inlet*n2=int_range1100inletsum=return(n1+n2)insum|?>(funx->xmod7=0))inlettrue_values=calculate_true_valuesright_heavy_spaceinPrintf.printf"True values\n";Printf.printf" number of nodes: %d\n"true_values.nodes;Printf.printf" number of fails: %d\n"true_values.fails;Printf.printf" number of solutions: %d\n"true_values.solutions;Printf.printf"\n";letest=createright_heavy_spaceinforsamplers=1to5doletsamples=1000*samplersinsample1000est;Printf.printf"Sample run %d:\n"samples;letestimates=estimatesestinPrintf.printf"Estimated values (incremental):\n";Printf.printf" materialized nodes: %d\n"estimates.materialized_nodes;Printf.printf" number of nodes: %d\n"(int_of_float(estimates.nodes+.0.5));Printf.printf" number of fails: %d\n"(int_of_float(estimates.fails+.0.5));Printf.printf" number of solutions: %d\n"(int_of_float(estimates.solutions+.0.5));Printf.printf"\n";done;[%expect{|
True values
number of nodes: 20201
number of fails: 8673
number of solutions: 1428
Sample run 1000:
Estimated values (incremental):
materialized nodes: 2099
number of nodes: 2199
number of fails: 946
number of solutions: 154
Sample run 2000:
Estimated values (incremental):
materialized nodes: 4101
number of nodes: 4211
number of fails: 1798
number of solutions: 308
Sample run 3000:
Estimated values (incremental):
materialized nodes: 6099
number of nodes: 6199
number of fails: 2665
number of solutions: 435
Sample run 4000:
Estimated values (incremental):
materialized nodes: 8099
number of nodes: 8199
number of fails: 3515
number of solutions: 585
Sample run 5000:
Estimated values (incremental):
materialized nodes: 10099
number of nodes: 10199
number of fails: 4372
number of solutions: 728
|}]