123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104(** Half moons synthetic dataset generation *)openBigarray(** Configuration for the half moons dataset *)moduleConfig=structtypet={noise_range:float;(** Range of noise to add to the coordinates *)seed:intoption;(** Optional random seed for reproducibility *)}letdefault={noise_range=0.1;seed=None}endmoduleRandom=Rand.Random_for_tests(** Internal helper function to generate half moons with specified precision.
@param kind The bigarray kind (float32 or float64)
@param config Configuration for noise and randomization
@param len Number of samples per moon (total samples = len * 2)
@return
A tuple of (coordinates, labels) where:
- coordinates is a bigarray of shape [len*2; 2] (batch_axis, output_axis)
- labels is a bigarray of shape [len*2; 1] (batch_axis, output_axis)
- First moon has label 1.0, second moon has label -1.0 *)letgenerate_with_kindkind?(config=Config.default)~len()=(* Initialize random seed if specified *)(matchconfig.seedwithSomeseed->Random.initseed|None->());letnoise()=Random.float_range~-.(config.noise_range)config.noise_rangeinlettotal_samples=len*2in(* Create bigarrays with batch axis first, then output axis *)letcoordinates=Genarray.createkindc_layout[|total_samples;2|]inletlabels=Genarray.createkindc_layout[|total_samples;1|]in(* Generate first moon (label = 1.0) *)fori=0tolen-1doletv=Float.of_inti*.Float.pi/.Float.of_intleninletc=Float.cosvands=Float.sinvinletx=c+.noise()andy=s+.noise()inletidx=i*2inGenarray.setcoordinates[|idx;0|]x;Genarray.setcoordinates[|idx;1|]y;Genarray.setlabels[|idx;0|]1.0done;(* Generate second moon (label = -1.0) *)fori=0tolen-1doletv=Float.of_inti*.Float.pi/.Float.of_intleninletc=Float.cosvands=Float.sinvinletx=1.0-.c+.noise()andy=0.5-.s+.noise()inletidx=(i*2)+1inGenarray.setcoordinates[|idx;0|]x;Genarray.setcoordinates[|idx;1|]y;Genarray.setlabels[|idx;0|](-1.0)done;(coordinates,labels)(** Generate the half moons dataset with the specified parameters.
@param config Configuration for noise and randomization
@param len Number of samples per moon (total samples = len * 2)
@return
A tuple of (coordinates, labels) where:
- coordinates is a bigarray of shape [len*2; 2] (batch_axis, output_axis)
- labels is a bigarray of shape [len*2; 1] (batch_axis, output_axis)
- First moon has label 1.0, second moon has label -1.0 *)letgenerate?(config=Config.default)~len()=generate_with_kindfloat64~config~len()(** Generate the half moons dataset with single precision floats.
@param config Configuration for noise and randomization
@param len Number of samples per moon (total samples = len * 2)
@return
A tuple of (coordinates, labels) where:
- coordinates is a bigarray of shape [len*2; 2] (batch_axis, output_axis) with float32
elements
- labels is a bigarray of shape [len*2; 1] (batch_axis, output_axis) with float32 elements
- First moon has label 1.0, second moon has label -1.0 *)letgenerate_single_prec?(config=Config.default)~len()=generate_with_kindfloat32~config~len()(** Generate half moons dataset using the old array-based approach for compatibility. This function
is deprecated and provided for backwards compatibility.
@param len Number of samples per moon
@param noise_range Range of noise to add
@return A tuple of (coordinates_array, labels_array) as flat arrays *)letgenerate_arrays?(noise_range=0.1)~len()=letnoise()=Random.float_range~-.noise_rangenoise_rangeinletcoordinates=Array.concat(Array.to_list(Array.initlen(fun_->leti=Random.intleninletv=Float.of_inti*.Float.pi/.Float.of_intleninletc=Float.cosvands=Float.sinvin[|c+.noise();s+.noise();1.0-.c+.noise();0.5-.s+.noise()|])))inletlabels=Array.init(len*2)(funi->ifimod2=0then1.else-1.)in(coordinates,labels)