Finite State Environment Benchmark

Run Parameters
n_runs 25
n_epochs 100
n_steps 100
n_steps_test 1000

GridWorld

DoubleQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
QLambda:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  lambda_coeff: 0.9
  learning_rate: ExponentialParameter
  trace: replacing
QLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
SARSA:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
SARSALambda:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  lambda_coeff: 0.9
  learning_rate: ExponentialParameter
  trace: replacing
SpeedyQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
WeightedQLearning:
  decay_eps: 0.5
  decay_lr: 0.8
  epsilon: ExponentialParameter
  epsilon_test: 0.0
  learning_rate: ExponentialParameter
  precision: 1000
  sampling: true
../../../_images/J14.png ../../../_images/R14.png