A Continuous Estimation of Distribution Algorithm by Evolving Graph Structures Using Reinforcement Learning
Created by W.Langdon from
gp-bibliography.bib Revision:1.7906
- @InProceedings{Li:2012:CECc,
-
title = "A Continuous Estimation of Distribution Algorithm by
Evolving Graph Structures Using Reinforcement
Learning",
-
author = "Xianneng Li and Bing Li and Shingo Mabu and
Kotaro Hirasawa",
-
pages = "2097--2104",
-
booktitle = "Proceedings of the 2012 IEEE Congress on Evolutionary
Computation",
-
year = "2012",
-
editor = "Xiaodong Li",
-
month = "10-15 " # jun,
-
DOI = "doi:10.1109/CEC.2012.6256481",
-
address = "Brisbane, Australia",
-
ISBN = "0-7803-8515-2",
-
keywords = "genetic algorithms, genetic programming, Genetic
Network Programming, Estimation of distribution
algorithms, Adaptive dynamic programming and
reinforcement learning, Representation and operators",
-
abstract = "A novel graph-based Estimation of Distribution
Algorithm (EDA) named Probabilistic Model Building
Genetic Network Programming (PMBGNP) has been proposed.
Inspired by classical EDAs, PMBGNP memorises the
current best individuals and uses them to estimate a
distribution for the generation of the new population.
However, PMBGNP can evolve compact programs by
representing its solutions as graph structures.
Therefore, it can solve a range of problems different
from conventional ones in EDA literature, such as data
mining and Reinforcement Learning (RL) problems. This
paper extends PMBGNP from discrete to continuous search
space, which is named PMBGNP-AC. Besides evolving the
node connections to determine the optimal graph
structures using conventional PMBGNP, Gaussian
distribution is used for the distribution of continuous
variables of nodes. The mean value mu and standard
deviation sigma are constructed like those of classical
continuous Population-based incremental learning
(PBILc). However, a RL technique, i.e., Actor-Critic
(AC), is designed to update the parameters (mu and
sigma). AC allows us to calculate the
Temporal-Difference (TD) error to evaluate whether the
selection of the continuous value is better or worse
than expected. This scalar reinforcement signal can
decide whether the tendency to select this continuous
value should be strengthened or weakened, allowing us
to determine the shape of the probability density
functions of the Gaussian distribution. The proposed
algorithm is applied to a RL problem, i.e., autonomous
robot control, where the robot's wheel speeds and
sensor values are continuous. The experimental results
show the superiority of PMBGNP-AC comparing with the
conventional algorithms.",
-
notes = "WCCI 2012. CEC 2012 - A joint meeting of the IEEE, the
EPS and the IET.",
- }
Genetic Programming entries for
Xianneng Li
Bing Li
Shingo Mabu
Kotaro Hirasawa
Citations