2023
Barbero-Aparicio, José A.; Olivares-Gil, Alicia; Díez-Pastor, José F.; García-Osorio, César
Deep learning and support vector machines for transcription start site identification Journal Article
In: PeerJ Computer Science, vol. 9, iss. e1340, 2023, ISSN: 2376-5992.
Abstract | Links | BibTeX | Tags: bioinformatics, Convolutional neural network, Deep learning, Long short-term memory, Machine learning, Support vector machines, transcription start site
@article{barbero-aparicio2023,
title = {Deep learning and support vector machines for transcription start site identification},
author = {José A. Barbero-Aparicio and Alicia Olivares-Gil and José F. Díez-Pastor and César García-Osorio},
editor = {Carlos Fernandez-Lozano},
url = {https://doi.org/10.7717/peerj-cs.1340},
doi = {10.7717/peerj-cs.1340},
issn = {2376-5992},
year = {2023},
date = {2023-04-17},
urldate = {2023-04-17},
journal = {PeerJ Computer Science},
volume = {9},
issue = {e1340},
abstract = {Recognizing transcription start sites is key to gene identification. Several approaches have been employed in related problems such as detecting translation initiation sites or promoters, many of the most recent ones based on machine learning. Deep learning methods have been proven to be exceptionally effective for this task, but their use in transcription start site identification has not yet been explored in depth. Also, the very few existing works do not compare their methods to support vector machines (SVMs), the most established technique in this area of study, nor provide the curated dataset used in the study. The reduced amount of published papers in this specific problem could be explained by this lack of datasets. Given that both support vector machines and deep neural networks have been applied in related problems with remarkable results, we compared their performance in transcription start site predictions, concluding that SVMs are computationally much slower, and deep learning methods, specially long short-term memory neural networks (LSTMs), are best suited to work with sequences than SVMs. For such a purpose, we used the reference human genome GRCh38. Additionally, we studied two different aspects related to data processing: the proper way to generate training examples and the imbalanced nature of the data. Furthermore, the generalization performance of the models studied was also tested using the mouse genome, where the LSTM neural network stood out from the rest of the algorithms. To sum up, this article provides an analysis of the best architecture choices in transcription start site identification, as well as a method to generate transcription start site datasets including negative instances on any species available in Ensembl. We found that deep learning methods are better suited than SVMs to solve this problem, being more efficient and better adapted to long sequences and large amounts of data. We also create a transcription start site (TSS) dataset large enough to be used in deep learning experiments},
keywords = {bioinformatics, Convolutional neural network, Deep learning, Long short-term memory, Machine learning, Support vector machines, transcription start site},
pubstate = {published},
tppubtype = {article}
}
2015
Santos, Pedro; Villa, Luisa F; Reñones, Anibal; Bustillo, Andrés; Maudes-Raedo, Jesús
An SVM-Based Solution for Fault Detection in Wind Turbines Journal Article
In: Sensors, vol. 15, no. 3, pp. 5627-5648, 2015, ISSN: 1424-8220.
Abstract | Links | BibTeX | Tags: Fault diagnosis, Neural networks, Support vector machines, wind turbines
@article{Santos2015,
title = {An SVM-Based Solution for Fault Detection in Wind Turbines},
author = {Pedro Santos and Luisa F Villa and Anibal Reñones and Andrés Bustillo and Jesús Maudes-Raedo},
url = {http://www.mdpi.com/1424-8220/15/3/5627},
doi = {10.3390/s150305627},
issn = {1424-8220},
year = {2015},
date = {2015-03-09},
journal = {Sensors},
volume = {15},
number = {3},
pages = {5627-5648},
abstract = {Research into fault diagnosis in machines with a wide range of variable loads and speeds, such as wind turbines, is of great industrial interest. Analysis of the power signals emitted by wind turbines for the diagnosis of mechanical faults in their mechanical transmission chain is insufficient. A successful diagnosis requires the inclusion of accelerometers to evaluate vibrations. This work presents a multi-sensory system for fault diagnosis in wind turbines, combined with a data-mining solution for the classification of the operational state of the turbine. The selected sensors are accelerometers, in which vibration signals are processed using angular resampling techniques and electrical, torque and speed measurements. Support vector machines (SVMs) are selected for the classification task, including two traditional and two promising new kernels. This multi-sensory system has been validated on a test-bed that simulates the real conditions of wind turbines with two fault typologies: misalignment and imbalance. Comparison of SVM performance with the results of artificial neural networks (ANNs) shows that linear kernel SVM outperforms other kernels and ANNs in terms of accuracy, training and tuning times. The suitability and superior performance of linear SVM is also experimentally analyzed, to conclude that this data acquisition technique generates linearly separable datasets.},
keywords = {Fault diagnosis, Neural networks, Support vector machines, wind turbines},
pubstate = {published},
tppubtype = {article}
}
2014
Santos, Pedro; Teixidor, Daniel; Maudes-Raedo, Jesús; Ciurana, Joaquim
Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles Journal Article
In: Journal of Applied Mathematics, vol. 2014, pp. 15, 2014, ISBN: 1110-757X.
Abstract | Links | BibTeX | Tags: Ensemble methods, Laser milling, Neural networks, Support vector machines
@article{Santos2014,
title = {Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles},
author = {Pedro Santos and Daniel Teixidor and Jesús Maudes-Raedo and Joaquim Ciurana},
url = {https://www.hindawi.com/journals/jam/2014/439091/},
doi = {10.1155/2014/439091},
isbn = {1110-757X},
year = {2014},
date = {2014-04-17},
journal = {Journal of Applied Mathematics},
volume = {2014},
pages = {15},
abstract = {A set of designed experiments, involving the use of a pulsed Nd:YAG laser system milling 316L Stainless Steel, serve to study the laser-milling process of microcavities in the manufacture of drug-eluting stents (DES). Diameter, depth, and volume error are considered to be optimized as functions of the process parameters, which include laser intensity, pulse frequency, and scanning speed. Two different DES shapes are studied that combine semispheres and cylinders. Process inputs and outputs are defined by considering the process parameters that can be changed under industrial conditions and the industrial requirements of this manufacturing process. In total, 162 different conditions are tested in a process that is modeled with the following state-of-the-art data-mining regression techniques: Support Vector Regression, Ensembles, Artificial Neural Networks, Linear Regression, and Nearest Neighbor Regression. Ensemble regression emerged as the most suitable technique for studying this industrial problem. Specifically, Iterated Bagging ensembles with unpruned model trees outperformed the other methods in the tests. This method can predict the geometrical dimensions of the machined microcavities with relative errors related to the main average value in the range of 3 to 23%, which are considered very accurate predictions, in view of the characteristics of this innovative industrial task.},
keywords = {Ensemble methods, Laser milling, Neural networks, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
2011
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; Pardo, Carlos
Random projections for linear SVM ensembles Journal Article
In: Applied Intelligence, vol. 34, pp. 347-359, 2011, ISSN: 0924-669X, 1573-7497, (10.1007/s10489-011-0283-2).
Links | BibTeX | Tags: Data Mining, Ensemble methods, Support vector machines
@article{RandomProjectionsLinearSVMs,
title = {Random projections for linear SVM ensembles},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Carlos Pardo},
url = {http://dx.doi.org/10.1007/s10489-011-0283-2},
doi = {10.1007/s10489-011-0283-2},
issn = {0924-669X, 1573-7497},
year = {2011},
date = {2011-01-01},
journal = {Applied Intelligence},
volume = {34},
pages = {347-359},
publisher = {Springer Netherlands},
note = {10.1007/s10489-011-0283-2},
keywords = {Data Mining, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
2010
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; Pardo, Carlos
Random Projections for SVM Ensembles Book Chapter
In: García-Pedrajas, Nicolás; Herrera, Francisco; Fyfe, Colin; Benítez, José Manuel; Ali, Moonis (Ed.): Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010, vol. 6097, pp. 87–95, Springer, Córdoba, Spain, 2010, ISBN: 978-3-642-13024-3.
Links | BibTeX | Tags: Data Mining, Support vector machines
@inbook{MRGP10,
title = {Random Projections for SVM Ensembles},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Carlos Pardo},
editor = {Nicolás García-Pedrajas and Francisco Herrera and Colin Fyfe and José Manuel Benítez and Moonis Ali},
url = {http://link.springer.com/chapter/10.1007%2F978-3-642-13025-0_10},
isbn = {978-3-642-13024-3},
year = {2010},
date = {2010-01-01},
booktitle = {Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010},
volume = {6097},
pages = {87–95},
publisher = {Springer},
address = {Córdoba, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Support vector machines},
pubstate = {published},
tppubtype = {inbook}
}
2009
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Disturbing Neighbors Ensembles for Linear SVM Proceedings Article
In: Benediktsson, Jon Atli; Kittler, Josef; Roli, Fabio (Ed.): 8th International Workshop on Multiple Classifier Systems, MCS 2009, pp. 191–200, Springer-Verlag, Reykjavik, Iceland, 2009, ISBN: 978-3-642-02325-5.
Links | BibTeX | Tags: Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines
@inproceedings{MRG09a,
title = {Disturbing Neighbors Ensembles for Linear SVM},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
editor = {Jon Atli Benediktsson and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-02326-2_20},
isbn = {978-3-642-02325-5},
year = {2009},
date = {2009-01-01},
booktitle = {8th International Workshop on Multiple Classifier Systems, MCS 2009},
volume = {5519},
pages = {191–200},
publisher = {Springer-Verlag},
address = {Reykjavik, Iceland},
series = {Lecture Notes in Computer Science},
keywords = {Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {inproceedings}
}