2020 |
Rodríguez, Juan José; Juez-Gil, Mario; Arnaiz-González, Álvar; Kuncheva, Ludmila I An experimental evaluation of mixup regression forests Journal Article In: Expert Systems with Applications, 151 (113376), 2020, ISSN: 0957-4174. Abstract | Links | BibTeX | Tags: Mixup, Random forest, Regression, Rotation forest @article{Rodríguez2020, title = {An experimental evaluation of mixup regression forests}, author = {Juan José Rodríguez and Mario Juez-Gil and Álvar Arnaiz-González and Ludmila I Kuncheva}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417420302013?via%3Dihub}, doi = {10.1016/j.eswa.2020.113376}, issn = {0957-4174}, year = {2020}, date = {2020-08-01}, journal = {Expert Systems with Applications}, volume = {151}, number = {113376}, abstract = {Over the past few decades, the remarkable prediction capabilities of ensemble methods have been used within a wide range of applications. Maximization of base-model ensemble accuracy and diversity are the keys to the heightened performance of these methods. One way to achieve diversity for training the base models is to generate artificial/synthetic instances for their incorporation with the original instances. Recently, the mixup method was proposed for improving the classification power of deep neural networks (Zhang, Cissé, Dauphin, and Lopez-Paz, 2017). Mixup method generates artificial instances by combining pairs of instances and their labels, these new instances are used for training the neural networks promoting its regularization. In this paper, new regression tree ensembles trained with mixup, which we will refer to as Mixup Regression Forest, are presented and tested. The experimental study with 61 datasets showed that the mixup approach improved the results of both Random Forest and Rotation Forest.}, keywords = {Mixup, Random forest, Regression, Rotation forest}, pubstate = {published}, tppubtype = {article} } Over the past few decades, the remarkable prediction capabilities of ensemble methods have been used within a wide range of applications. Maximization of base-model ensemble accuracy and diversity are the keys to the heightened performance of these methods. One way to achieve diversity for training the base models is to generate artificial/synthetic instances for their incorporation with the original instances. Recently, the mixup method was proposed for improving the classification power of deep neural networks (Zhang, Cissé, Dauphin, and Lopez-Paz, 2017). Mixup method generates artificial instances by combining pairs of instances and their labels, these new instances are used for training the neural networks promoting its regularization. In this paper, new regression tree ensembles trained with mixup, which we will refer to as Mixup Regression Forest, are presented and tested. The experimental study with 61 datasets showed that the mixup approach improved the results of both Random Forest and Rotation Forest. |
2016 |
Bustillo, Andres; de Lacalle, Luis López N; Fernández-Valdivielso, Asier; Santos, Pedro Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components Journal Article In: Journal of Computational Design and Engineering, 3 (4), pp. 337 - 348, 2016, ISSN: 2288-4300. Abstract | Links | BibTeX | Tags: Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading @article{BUSTILLO2016337, title = {Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components}, author = {Andres Bustillo and Luis López N de Lacalle and Asier Fernández-Valdivielso and Pedro Santos}, url = {http://www.sciencedirect.com/science/article/pii/S2288430016300306}, doi = {https://doi.org/10.1016/j.jcde.2016.06.002}, issn = {2288-4300}, year = {2016}, date = {2016-10-01}, journal = {Journal of Computational Design and Engineering}, volume = {3}, number = {4}, pages = {337 - 348}, abstract = {An experimental approach is presented for the measurement of wear that is common in the threading of cold-forged steel. In this work, the first objective is to measure wear on various types of roll taps manufactured to tapping holes in microalloyed HR45 steel. Different geometries and levels of wear are tested and measured. Taking their geometry as the critical factor, the types of forming tap with the least wear and the best performance are identified. Abrasive wear was observed on the forming lobes. A higher number of lobes in the chamber zone and around the nominal diameter meant a more uniform load distribution and a more gradual forming process. A second objective is to identify the most accurate data-mining technique for the prediction of form-tap wear. Different data-mining techniques are tested to select the most accurate one: from standard versions such as Multilayer Perceptrons, Support Vector Machines and Regression Trees to the most recent ones such as Rotation Forest ensembles and Iterated Bagging ensembles. The best results were obtained with ensembles of Rotation Forest with unpruned Regression Trees as base regressors that reduced the RMS error of the best-tested baseline technique for the lower length output by 33%, and Additive Regression with unpruned M5P as base regressors that reduced the RMS errors of the linear fit for the upper and total lengths by 25% and 39%, respectively. However, the lower length was statistically more difficult to model in Additive Regression than in Rotation Forest. Rotation Forest with unpruned Regression Trees as base regressors therefore appeared to be the most suitable regressor for the modeling of this industrial problem.}, keywords = {Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading}, pubstate = {published}, tppubtype = {article} } An experimental approach is presented for the measurement of wear that is common in the threading of cold-forged steel. In this work, the first objective is to measure wear on various types of roll taps manufactured to tapping holes in microalloyed HR45 steel. Different geometries and levels of wear are tested and measured. Taking their geometry as the critical factor, the types of forming tap with the least wear and the best performance are identified. Abrasive wear was observed on the forming lobes. A higher number of lobes in the chamber zone and around the nominal diameter meant a more uniform load distribution and a more gradual forming process. A second objective is to identify the most accurate data-mining technique for the prediction of form-tap wear. Different data-mining techniques are tested to select the most accurate one: from standard versions such as Multilayer Perceptrons, Support Vector Machines and Regression Trees to the most recent ones such as Rotation Forest ensembles and Iterated Bagging ensembles. The best results were obtained with ensembles of Rotation Forest with unpruned Regression Trees as base regressors that reduced the RMS error of the best-tested baseline technique for the lower length output by 33%, and Additive Regression with unpruned M5P as base regressors that reduced the RMS errors of the linear fit for the upper and total lengths by 25% and 39%, respectively. However, the lower length was statistically more difficult to model in Additive Regression than in Rotation Forest. Rotation Forest with unpruned Regression Trees as base regressors therefore appeared to be the most suitable regressor for the modeling of this industrial problem. |
2015 |
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article In: Information Sciences, 325 , pp. 98 - 117, 2015, ISSN: 0020-0255. Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling @article{DiezPastor201598, title = {Diversity techniques improve the performance of the best imbalance learning ensembles}, author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva}, url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186}, doi = {10.1016/j.ins.2015.07.025}, issn = {0020-0255}, year = {2015}, date = {2015-01-01}, journal = {Information Sciences}, volume = {325}, pages = {98 - 117}, abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.}, keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling}, pubstate = {published}, tppubtype = {article} } Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques. |
2013 |
Pardo, Carlos; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José Rotation Forests for regression Journal Article In: Applied Mathematics and Computation, 219 (19), pp. 9914-9924, 2013, ISSN: 0096-3003. Links | BibTeX | Tags: Data Mining, Regression, Rotation forest @article{amcPardoDGR13, title = {Rotation Forests for regression}, author = {Carlos Pardo and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez}, doi = {10.1016/j.amc.2013.03.139}, issn = {0096-3003}, year = {2013}, date = {2013-01-01}, journal = {Applied Mathematics and Computation}, volume = {219}, number = {19}, pages = {9914-9924}, keywords = {Data Mining, Regression, Rotation forest}, pubstate = {published}, tppubtype = {article} } |
Publications
2020 |
An experimental evaluation of mixup regression forests Journal Article In: Expert Systems with Applications, 151 (113376), 2020, ISSN: 0957-4174. |
2016 |
Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components Journal Article In: Journal of Computational Design and Engineering, 3 (4), pp. 337 - 348, 2016, ISSN: 2288-4300. |
2015 |
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article In: Information Sciences, 325 , pp. 98 - 117, 2015, ISSN: 0020-0255. |
2013 |
Rotation Forests for regression Journal Article In: Applied Mathematics and Computation, 219 (19), pp. 9914-9924, 2013, ISSN: 0096-3003. |