Finite State Environment Benchmark

Run Parameters

n_runs

25

n_epochs

100

n_steps

100

n_steps_test

1000

GridWorld

DoubleQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
QLambda:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  lambda_coeff: 0.9
  learning_rate: ExponentialParameter
  trace: replacing
QLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
SARSA:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
SARSALambda:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  lambda_coeff: 0.9
  learning_rate: ExponentialParameter
  trace: replacing
SpeedyQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
WeightedQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
  precision: 1000
  sampling: true
../../../_images/J14.png ../../../_images/R14.png