Biomarker Modelling in Omics Technologies Using Symbolic Regression
Created by W.Langdon from
gp-bibliography.bib Revision:1.8506
- @InProceedings{rojas-velazquez:2025:GECCOcomp,
-
author = "David Eduardo Rojas-Velazquez and Alberto Tonda and
Alejandro Lopez-Rincon",
-
title = "Biomarker Modelling in Omics Technologies Using
Symbolic Regression",
-
booktitle = "Proceedings of the 2025 Genetic and Evolutionary
Computation Conference Companion",
-
year = "2025",
-
editor = "Roman Kalkreuth and Alexander Brownlee",
-
pages = "895--898",
-
address = "Malaga, Spain",
-
series = "GECCO '25 Companion",
-
month = "14-18 " # jul,
-
organisation = "SIGEVO",
-
publisher = "Association for Computing Machinery",
-
publisher_address = "New York, NY, USA",
-
keywords = "genetic algorithms, genetic programming,
bioinformatics, feature selection, Real World
Applications: Poster",
-
isbn13 = "979-8-4007-1464-1",
-
URL = "
https://doi.org/10.1145/3712255.3726746",
-
DOI = "
doi:10.1145/3712255.3726746",
-
size = "4 pages",
-
abstract = "Omics data can contain predictive information of the
onset of diseases and chronic conditions. Applying
machine learning (ML) techniques to omics data is a
promising venue of research, but domain data sets are
typically high-dimensional and low-sample-size,
presenting significant challenges to classic ML
approaches. Another obstacle is the black-box nature of
many ML algorithms, which prevents them from being
deployed in medical practice. Symbolic regression (SR)
is a possible solution to obtain human-interpretable
models; but even equations cannot be easily understood,
if they include hundreds or thousands of features.
While feature selection can help reducing the number of
features to be considered, most algorithms make
unrealistic assumptions or bias the selection using a
single classifier. In this work, we apply the Recursive
Ensemble Feature Selection (REFS) algorithm, designed
to avoid over-relying on a single ML model, with a
modern SR algorithm, to obtain interpretable models
predictive for different diseases, starting from
real-world omics data. Experimental results for five
different omics studies show that the completely
open-source approach is competitive with the
state-of-the-art in closed-source software. Comparing
the same pipeline with REFS and more classic feature
selection techniques shows that models created with
REFS have a better performance.",
-
notes = "GECCO-2025 RWA A Recombination of the 34th
International Conference on Genetic Algorithms (ICGA)
and the 30th Annual Genetic Programming Conference
(GP)",
- }
Genetic Programming entries for
David Eduardo Rojas-Velazquez
Alberto Tonda
Alejandro Lopez-Rincon
Citations