title = "An Evolutionary Algorithm Approach for Feature
Generation from Sequence Data and Its Application to
DNA Splice Site Prediction",
journal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
year = "2012",
volume = "9",
number = "5",
pages = "1387--1398",
month = sep # "/" # oct,
keywords = "genetic algorithms, genetic programming, Evolutionary
computation, feature extraction and construction,
classifier design and evaluation, data mining, DNA
splice sites",
ISSN = "1545-5963",
DOI = "doi:10.1109/TCBB.2012.53",
size = "12 pages",
abstract = "Associating functional information with biological
sequences remains a challenge for machine learning
methods. The performance of these methods often depends
on deriving predictive features from the sequences
sought to be classified. Feature generation is a
difficult problem, as the connection between the
sequence features and the sought property is not known
a priori. It is often the task of domain experts or
exhaustive feature enumeration techniques to generate a
few features whose predictive power is then tested in
the context of classification. This paper proposes an
evolutionary algorithm to effectively explore a large
feature space and generate predictive features from
sequence data. The effectiveness of the algorithm is
demonstrated on an important component of the
gene-finding problem, DNA splice site prediction. This
application is chosen due to the complexity of the
features needed to obtain high classification accuracy
and precision. Our results test the effectiveness of
the obtained features in the context of classification
by Support Vector Machines and show significant
improvement in accuracy and precision over
state-of-the-art approaches.",