Module Fehu_envs.Mountain_carSource

Mountain car environment - drive up a steep hill using momentum.

ID: MountainCar-v0

Observation Space: Fehu.Space.Box with shape [2]:

Action Space: Fehu.Space.Discrete with 3 choices:

Rewards: -1.0 for each step until the goal is reached

Episode Termination:

Initial State: Random position in [-0.6, -0.4] with velocity 0.0

Rendering: ASCII visualization showing car position ('C') and goal ('G') on a track

Example

Train an agent to reach the goal by building momentum:

  let rng = Rune.Rng.create () in
  let env = Fehu_envs.Mountain_car.make ~rng () in
  let obs, _ = Fehu.Env.reset env () in
  let rec run_episode steps =
    let action = (* policy decision based on position and velocity *) in
    let t = Fehu.Env.step env action in
    if t.terminated then
      Printf.printf "Goal reached in %d steps!\n" steps
    else if t.truncated then
      Printf.printf "Failed to reach goal in 200 steps\n"
    else
      run_episode (steps + 1)
  in
  run_episode 0
Tips
Sourcetype observation = (float, Rune.float32_elt) Rune.t
Sourcetype action = (int32, Rune.int32_elt) Rune.t
Sourcetype render = string
Sourcetype state = {
  1. mutable position : float;
  2. mutable velocity : float;
  3. mutable steps : int;
  4. rng : Rune.Rng.key ref;
}
Sourceval min_position : float
Sourceval max_position : float
Sourceval max_speed : float
Sourceval goal_position : float
Sourceval goal_velocity : float
Sourceval force : float
Sourceval gravity : float
Sourceval observation_space : Fehu.Space.Box.element Fehu__Space.t
Sourceval action_space : Fehu.Space.Discrete.element Fehu__Space.t
Sourceval metadata : Fehu.Metadata.t
Sourceval reset : 'a -> ?options:'b -> unit -> state -> (float, Rune.float32_elt) Rune.t * Fehu.Info.t
Sourceval step : 'a -> (Int32.t, 'b) Rune.t -> state -> ((float, Rune.float32_elt) Rune.t, 'c, 'd) Fehu.Env.transition
Sourceval render : state -> string