Comparing Methods for Estimating Marginal Likelihood in Symbolic Regression
Created by W.Langdon from
gp-bibliography.bib Revision:1.7970
- @InProceedings{leser:2024:GECCOcomp,
-
author = "Patrick Leser and Geoffrey Bomarito and
Gabriel Kronberger and Fabricio {Olivetti De Franca}",
-
title = "Comparing Methods for Estimating Marginal Likelihood
in Symbolic Regression",
-
booktitle = "Symbolic Regression",
-
year = "2024",
-
editor = "William {La Cava} and Steven Gustafson",
-
pages = "2058--2066",
-
address = "Melbourne, Australia",
-
series = "GECCO '24",
-
month = "14-18 " # jul,
-
organisation = "SIGEVO",
-
publisher = "Association for Computing Machinery",
-
publisher_address = "New York, NY, USA",
-
keywords = "genetic algorithms, genetic programming, symbolic
regression, model selection, equation learning,
marginal likelihood",
-
isbn13 = "979-8-4007-0495-6",
-
DOI = "doi:10.1145/3638530.3664142",
-
size = "9 pages",
-
abstract = "Marginal likelihood has been proposed as a genetic
programming-based symbolic regression (GPSR) fitness
metric to prevent overly complex expressions and
overfitting, particularly when data is limited and
noisy. Here, two particular methods for estimating
marginal likelihood - the Laplace approximation and
sequential Monte Carlo - are studied with a focus on
tradeoffs between accuracy and computational
efficiency. The comparison focuses on practical
challenges in the context of two sets of example
problems. First, the methods are compared on
handcrafted expressions exhibiting nonlinearity and
multimodality in their respective posterior
distributions. Next, the methods are compared on a
real-world set of equations produced by GPSR using
training data from a well-known symbolic regression
benchmark. A key finding is that there are potentially
significant differences between the methods that, for
example, could lead to conflicting selection of
expressions within a GPSR implementation. However, it
is concluded that there are scenarios where either
method could be preferred over the other based on
accuracy or computational budget. Algorithmic
improvements for both methods as well as future areas
of study are discussed.",
-
notes = "GECCO-2024 SymReg A Recombination of the 33rd
International Conference on Genetic Algorithms (ICGA)
and the 29th Annual Genetic Programming Conference
(GP)",
- }
Genetic Programming entries for
Patrick E Leser
Geoffrey F Bomarito
Gabriel Kronberger
Fabricio Olivetti de Franca
Citations