Source file mountain_car.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
open Fehu
type observation = (float, Rune.float32_elt) Rune.t
type action = (int32, Rune.int32_elt) Rune.t
type render = string
type state = {
mutable position : float;
mutable velocity : float;
mutable steps : int;
rng : Rune.Rng.key ref;
}
let min_position = -1.2
let max_position = 0.6
let max_speed = 0.07
let goal_position = 0.5
let goal_velocity = 0.0
let force = 0.001
let gravity = 0.0025
let observation_space =
Space.Box.create
~low:[| min_position; -.max_speed |]
~high:[| max_position; max_speed |]
let action_space = Space.Discrete.create 3
let metadata =
Metadata.default
|> Metadata.add_render_mode "ansi"
|> Metadata.with_description
(Some "Classic mountain car problem - drive up a steep hill")
|> Metadata.add_author "Fehu"
|> Metadata.with_version (Some "0.1.0")
let reset _env ?options:_ () state =
let keys = Rune.Rng.split !(state.rng) ~n:2 in
state.rng := keys.(0);
let r = Rune.Rng.uniform keys.(1) Rune.float32 [| 1 |] in
let v = (Rune.to_array r).(0) in
state.position <- -0.6 +. (v *. 0.2);
state.velocity <- 0.0;
state.steps <- 0;
let obs =
Rune.create Rune.float32 [| 2 |] [| state.position; state.velocity |]
in
(obs, Info.empty)
let step _env action state =
let action_value =
let arr : Int32.t array = Rune.to_array action in
Int32.to_int arr.(0)
in
let force_direction = float_of_int (action_value - 1) in
let velocity =
state.velocity +. (force_direction *. force)
-. (gravity *. cos (3.0 *. state.position))
in
let velocity = Float.max (-.max_speed) (Float.min velocity max_speed) in
let position = state.position +. velocity in
let position = Float.max min_position (Float.min position max_position) in
let velocity =
if position = min_position && velocity < 0.0 then 0.0 else velocity
in
state.position <- position;
state.velocity <- velocity;
state.steps <- state.steps + 1;
let terminated = position >= goal_position && velocity >= goal_velocity in
let truncated = state.steps >= 200 in
let reward = -1.0 in
let obs =
Rune.create Rune.float32 [| 2 |] [| state.position; state.velocity |]
in
let info = Info.set "steps" (Info.int state.steps) Info.empty in
Env.transition ~observation:obs ~reward ~terminated ~truncated ~info ()
let render state =
let normalized_pos =
(state.position -. min_position) /. (max_position -. min_position)
in
let car_pos = int_of_float (normalized_pos *. 40.0) in
let goal_pos =
int_of_float
((goal_position -. min_position) /. (max_position -. min_position) *. 40.0)
in
let track = Bytes.make 41 '-' in
Bytes.set track goal_pos 'G';
Bytes.set track (max 0 (min 40 car_pos)) 'C';
Printf.sprintf "MountainCar: [%s] pos=%.3f, vel=%.3f, steps=%d"
(Bytes.to_string track) state.position state.velocity state.steps
let make ~rng () =
let state = { position = 0.0; velocity = 0.0; steps = 0; rng = ref rng } in
Env.create ~id:"MountainCar-v0" ~metadata ~rng ~observation_space
~action_space
~reset:(fun env ?options () -> reset env ?options () state)
~step:(fun env action -> step env action state)
~render:(fun _ -> Some (render state))
~close:(fun _ -> ())
()