2021
Juez-Gil, Mario; Arnaiz-González, Álvar; Rodríguez, Juan José; López-Nozal, Carlos; García-Osorio, César
Rotation Forest for Big Data Journal Article
In: Information Fusion, vol. 74, pp. 39-49, 2021, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Big data, Ensemble learning, Machine learning, Random forest, Rotation forest, Spark
@article{Juez-Gil2021,
title = {Rotation Forest for Big Data},
author = {Mario Juez-Gil and Álvar Arnaiz-González and Juan José Rodríguez and Carlos López-Nozal and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S1566253521000634},
doi = {10.1016/j.inffus.2021.03.007},
issn = {1566-2535},
year = {2021},
date = {2021-10-01},
journal = {Information Fusion},
volume = {74},
pages = {39-49},
abstract = {The Rotation Forest classifier is a successful ensemble method for a wide variety of data mining applications. However, the way in which Rotation Forest transforms the feature space through PCA, although powerful, penalizes training and prediction times, making it unfeasible for Big Data. In this paper, a MapReduce Rotation Forest and its implementation under the Spark framework are presented. The proposed MapReduce Rotation Forest behaves in the same way as the standard Rotation Forest, training the base classifiers on a rotated space, but using a functional implementation of the rotation that enables its execution in Big Data frameworks. Experimental results are obtained using different cloud-based cluster configurations. Bayesian tests are used to validate the method against two ensembles for Big Data: Random Forest and PCARDE classifiers. Our proposal incorporates the parallelization of both the PCA calculation and the tree training, providing a scalable solution that retains the performance of the original Rotation Forest and achieves a competitive execution time (in average, at training, more than 3 times faster than other PCA-based alternatives). In addition, extensive experimentation shows that by setting some parameters of the classifier (i.e., bootstrap sample size, number of trees, and number of rotations), the execution time is reduced with no significant loss of performance using a small ensemble.},
keywords = {Big data, Ensemble learning, Machine learning, Random forest, Rotation forest, Spark},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Juez-Gil, Mario; López-Nozal, Carlos; Arnaiz-González, Álvar
Rotation Forest for multi-target regression Journal Article
In: International Journal of Machine Learning and Cybernetics, 2021, ISSN: 1868-808X.
Abstract | Links | BibTeX | Tags: ensemble, multi-target regression, Rotation forest
@article{Rodríguez2021,
title = {Rotation Forest for multi-target regression},
author = {Juan José Rodríguez and Mario Juez-Gil and Carlos López-Nozal and Álvar Arnaiz-González},
url = {https://link.springer.com/article/10.1007/s13042-021-01329-1},
doi = {https://doi.org/10.1007/s13042-021-01329-1},
issn = {1868-808X},
year = {2021},
date = {2021-04-22},
journal = {International Journal of Machine Learning and Cybernetics},
abstract = {The prediction of multiple numeric outputs at the same time is called multi-target regression (MTR), and it has gained attention during the last decades. This task is a challenging research topic in supervised learning because it poses additional difficulties to traditional single-target regression (STR), and many real-world problems involve the prediction of multiple targets at once. One of the most successful approaches to deal with MTR, although not the only one, consists in transforming the problem in several STR problems, whose outputs will be combined building up the MTR output. In this paper, the Rotation Forest ensemble method, previously proposed for single-label classification and single-target regression, is adapted to MTR tasks and tested with several regressors and data sets. Our proposal rotates the input space in an efficient and novel fashion, avoiding extra rotations forced by MTR problem decomposition. Four approaches for MTR are used: single-target (ST), stacked-single target (SST), Ensembles of Regressor Chains (ERC), and Multi-target Regression via Quantization (MRQ). For assessing the benefits of the proposal, a thorough experimentation with 28 MTR data sets and statistical tests are used, concluding that Rotation Forest, adapted by means of these approaches, outperforms other popular ensembles, such as Bagging and Random Forest.},
keywords = {ensemble, multi-target regression, Rotation forest},
pubstate = {published},
tppubtype = {article}
}
2020
Rodríguez, Juan José; Juez-Gil, Mario; Arnaiz-González, Álvar; Kuncheva, Ludmila I
An experimental evaluation of mixup regression forests Journal Article
In: Expert Systems with Applications, vol. 151, no. 113376, 2020, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: Mixup, Random forest, Regression, Rotation forest
@article{Rodríguez2020,
title = {An experimental evaluation of mixup regression forests},
author = {Juan José Rodríguez and Mario Juez-Gil and Álvar Arnaiz-González and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417420302013?via%3Dihub},
doi = {10.1016/j.eswa.2020.113376},
issn = {0957-4174},
year = {2020},
date = {2020-08-01},
journal = {Expert Systems with Applications},
volume = {151},
number = {113376},
abstract = {Over the past few decades, the remarkable prediction capabilities of ensemble methods have been used within a wide range of applications. Maximization of base-model ensemble accuracy and diversity are the keys to the heightened performance of these methods. One way to achieve diversity for training the base models is to generate artificial/synthetic instances for their incorporation with the original instances. Recently, the mixup method was proposed for improving the classification power of deep neural networks (Zhang, Cissé, Dauphin, and Lopez-Paz, 2017). Mixup method generates artificial instances by combining pairs of instances and their labels, these new instances are used for training the neural networks promoting its regularization. In this paper, new regression tree ensembles trained with mixup, which we will refer to as Mixup Regression Forest, are presented and tested. The experimental study with 61 datasets showed that the mixup approach improved the results of both Random Forest and Rotation Forest.},
keywords = {Mixup, Random forest, Regression, Rotation forest},
pubstate = {published},
tppubtype = {article}
}
2016
Bustillo, Andres; de Lacalle, Luis López N; Fernández-Valdivielso, Asier; Santos, Pedro
Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components Journal Article
In: Journal of Computational Design and Engineering, vol. 3, no. 4, pp. 337 - 348, 2016, ISSN: 2288-4300.
Abstract | Links | BibTeX | Tags: Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading
@article{BUSTILLO2016337,
title = {Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components},
author = {Andres Bustillo and Luis López N de Lacalle and Asier Fernández-Valdivielso and Pedro Santos},
url = {http://www.sciencedirect.com/science/article/pii/S2288430016300306},
doi = {https://doi.org/10.1016/j.jcde.2016.06.002},
issn = {2288-4300},
year = {2016},
date = {2016-10-01},
journal = {Journal of Computational Design and Engineering},
volume = {3},
number = {4},
pages = {337 - 348},
abstract = {An experimental approach is presented for the measurement of wear that is common in the threading of cold-forged steel. In this work, the first objective is to measure wear on various types of roll taps manufactured to tapping holes in microalloyed HR45 steel. Different geometries and levels of wear are tested and measured. Taking their geometry as the critical factor, the types of forming tap with the least wear and the best performance are identified. Abrasive wear was observed on the forming lobes. A higher number of lobes in the chamber zone and around the nominal diameter meant a more uniform load distribution and a more gradual forming process. A second objective is to identify the most accurate data-mining technique for the prediction of form-tap wear. Different data-mining techniques are tested to select the most accurate one: from standard versions such as Multilayer Perceptrons, Support Vector Machines and Regression Trees to the most recent ones such as Rotation Forest ensembles and Iterated Bagging ensembles. The best results were obtained with ensembles of Rotation Forest with unpruned Regression Trees as base regressors that reduced the RMS error of the best-tested baseline technique for the lower length output by 33%, and Additive Regression with unpruned M5P as base regressors that reduced the RMS errors of the linear fit for the upper and total lengths by 25% and 39%, respectively. However, the lower length was statistically more difficult to model in Additive Regression than in Rotation Forest. Rotation Forest with unpruned Regression Trees as base regressors therefore appeared to be the most suitable regressor for the modeling of this industrial problem.},
keywords = {Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading},
pubstate = {published},
tppubtype = {article}
}
2015
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article
In: Information Sciences, vol. 325, pp. 98 - 117, 2015, ISSN: 0020-0255.
Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling
@article{DiezPastor201598,
title = {Diversity techniques improve the performance of the best imbalance learning ensembles},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186},
doi = {10.1016/j.ins.2015.07.025},
issn = {0020-0255},
year = {2015},
date = {2015-01-01},
journal = {Information Sciences},
volume = {325},
pages = {98 - 117},
abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.},
keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
2013
Pardo, Carlos; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Rotation Forests for regression Journal Article
In: Applied Mathematics and Computation, vol. 219, no. 19, pp. 9914-9924, 2013, ISSN: 0096-3003.
Links | BibTeX | Tags: Data Mining, Regression, Rotation forest
@article{amcPardoDGR13,
title = {Rotation Forests for regression},
author = {Carlos Pardo and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
doi = {10.1016/j.amc.2013.03.139},
issn = {0096-3003},
year = {2013},
date = {2013-01-01},
journal = {Applied Mathematics and Computation},
volume = {219},
number = {19},
pages = {9914-9924},
keywords = {Data Mining, Regression, Rotation forest},
pubstate = {published},
tppubtype = {article}
}