2021
Juez-Gil, Mario; Arnaiz-González, Álvar; Rodríguez, Juan José; López-Nozal, Carlos; García-Osorio, César
Rotation Forest for Big Data Journal Article
In: Information Fusion, vol. 74, pp. 39-49, 2021, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Big data, Ensemble learning, Machine learning, Random forest, Rotation forest, Spark
@article{Juez-Gil2021,
title = {Rotation Forest for Big Data},
author = {Mario Juez-Gil and Álvar Arnaiz-González and Juan José Rodríguez and Carlos López-Nozal and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S1566253521000634},
doi = {10.1016/j.inffus.2021.03.007},
issn = {1566-2535},
year = {2021},
date = {2021-10-01},
journal = {Information Fusion},
volume = {74},
pages = {39-49},
abstract = {The Rotation Forest classifier is a successful ensemble method for a wide variety of data mining applications. However, the way in which Rotation Forest transforms the feature space through PCA, although powerful, penalizes training and prediction times, making it unfeasible for Big Data. In this paper, a MapReduce Rotation Forest and its implementation under the Spark framework are presented. The proposed MapReduce Rotation Forest behaves in the same way as the standard Rotation Forest, training the base classifiers on a rotated space, but using a functional implementation of the rotation that enables its execution in Big Data frameworks. Experimental results are obtained using different cloud-based cluster configurations. Bayesian tests are used to validate the method against two ensembles for Big Data: Random Forest and PCARDE classifiers. Our proposal incorporates the parallelization of both the PCA calculation and the tree training, providing a scalable solution that retains the performance of the original Rotation Forest and achieves a competitive execution time (in average, at training, more than 3 times faster than other PCA-based alternatives). In addition, extensive experimentation shows that by setting some parameters of the classifier (i.e., bootstrap sample size, number of trees, and number of rotations), the execution time is reduced with no significant loss of performance using a small ensemble.},
keywords = {Big data, Ensemble learning, Machine learning, Random forest, Rotation forest, Spark},
pubstate = {published},
tppubtype = {article}
}
2020
Bustillo, Andrés; Pimenov, Danil Yurievich; Mia, Mozammel; Kapłonek, Wojciech
Machine-learning for automatic prediction of flatness deviation considering the wear of the face mill teeth Journal Article
In: Journal of Intelligent Manufacturing, 2020, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: Cutting power, Face milling, Flatness deviation, Random forest, SMOTE, tool condition monitoring, Tool life, Wear
@article{Bustillo2020b,
title = {Machine-learning for automatic prediction of flatness deviation considering the wear of the face mill teeth},
author = {Andrés Bustillo and Danil Yurievich Pimenov and Mozammel Mia and Wojciech Kapłonek},
url = {https://link.springer.com/article/10.1007/s10845-020-01645-3},
doi = {https://doi.org/10.1007/s10845-020-01645-3},
issn = {0956-5515},
year = {2020},
date = {2020-09-03},
journal = {Journal of Intelligent Manufacturing},
abstract = {The acceptance of the machined surfaces not only depends on roughness parameters but also in the flatness deviation (Δfl). Hence, before reaching the threshold of flatness deviation caused by the wear of the face mill, the tool inserts need to be changed to avoid the expected product rejection. As current CNC machines have the facility to track, in real-time, the main drive power, the present study utilizes this facility to predict the flatness deviation—with proper consideration to the amount of wear of cutting tool insert’s edge. The prediction of deviation from flatness is evaluated as a regression and a classification problem, while different machine-learning techniques like Multilayer Perceptrons, Radial Basis Functions Networks, Decision Trees and Random Forest ensembles have been examined. Finally, Random Forest ensembles combined with Synthetic Minority Over-sampling Technique (SMOTE) balancing technique showed the highest performance when the flatness levels are discretized taking into account industrial requirements. The SMOTE balancing technique resulted in a very useful strategy to avoid the strong limitations that small experiment datasets produce in the accuracy of machine-learning models.},
keywords = {Cutting power, Face milling, Flatness deviation, Random forest, SMOTE, tool condition monitoring, Tool life, Wear},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Juez-Gil, Mario; Arnaiz-González, Álvar; Kuncheva, Ludmila I
An experimental evaluation of mixup regression forests Journal Article
In: Expert Systems with Applications, vol. 151, no. 113376, 2020, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: Mixup, Random forest, Regression, Rotation forest
@article{Rodríguez2020,
title = {An experimental evaluation of mixup regression forests},
author = {Juan José Rodríguez and Mario Juez-Gil and Álvar Arnaiz-González and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417420302013?via%3Dihub},
doi = {10.1016/j.eswa.2020.113376},
issn = {0957-4174},
year = {2020},
date = {2020-08-01},
journal = {Expert Systems with Applications},
volume = {151},
number = {113376},
abstract = {Over the past few decades, the remarkable prediction capabilities of ensemble methods have been used within a wide range of applications. Maximization of base-model ensemble accuracy and diversity are the keys to the heightened performance of these methods. One way to achieve diversity for training the base models is to generate artificial/synthetic instances for their incorporation with the original instances. Recently, the mixup method was proposed for improving the classification power of deep neural networks (Zhang, Cissé, Dauphin, and Lopez-Paz, 2017). Mixup method generates artificial instances by combining pairs of instances and their labels, these new instances are used for training the neural networks promoting its regularization. In this paper, new regression tree ensembles trained with mixup, which we will refer to as Mixup Regression Forest, are presented and tested. The experimental study with 61 datasets showed that the mixup approach improved the results of both Random Forest and Rotation Forest.},
keywords = {Mixup, Random forest, Regression, Rotation forest},
pubstate = {published},
tppubtype = {article}
}
2018
Oleaga, Ibone; Pardo, Carlos; Zulaika, Juan J; Bustillo, Andres
A machine-learning based solution for chatter prediction in heavy-duty milling machines Journal Article
In: Measurement, vol. 128, pp. 34 - 44, 2018, ISSN: 0263-2241.
Abstract | Links | BibTeX | Tags: Chatter, Milling, Polar diagrams, Random forest, Regression trees, Vibrations
@article{OLEAGA201834,
title = {A machine-learning based solution for chatter prediction in heavy-duty milling machines},
author = {Ibone Oleaga and Carlos Pardo and Juan J Zulaika and Andres Bustillo},
url = {http://www.sciencedirect.com/science/article/pii/S0263224118305542},
doi = {https://doi.org/10.1016/j.measurement.2018.06.028},
issn = {0263-2241},
year = {2018},
date = {2018-11-01},
journal = {Measurement},
volume = {128},
pages = {34 - 44},
abstract = {The main productivity constraints of milling operations are self-induced vibrations, especially regenerative chatter vibrations. Two key parameters are linked to these vibrations: the depth of cut achievable without vibrations and the chatter frequency. Both parameters are linked to the dynamics of machine component excitation and the milling operation parameters. Their identification in any cutting direction in milling machine operations requires complex analytical models and mechatronic simulations, usually only applied to identify the worst cutting conditions in operating machines. This work proposes the use of machine learning techniques with no need to calculate the two above-mentioned parameters by means of a 3-step strategy. The strategy combines: 1) experimental frequency responses collected at the tool center point; 2) analytical calculations of both parameters; and, 3) different machine learning techniques. The results of these calculations can then be used to predict chatter under different combinations of milling directions and machine positions. This strategy is validated with real experiments on a bridge milling machine performing concordance roughing operations on AISI 1045 steel with a 125 mm diameter mill fitted with nine cutters at 45°, the results of which have confirmed the high variability of both parameters along the working volume. The following regression techniques are tested: artificial neural networks, regression trees and Random Forest. The results show that Random Forest ensembles provided the highest accuracy with a statistical advantage over the other machine learning models; they achieved a final accuracy of 0.95 mm for the critical depth and 7.3 Hz for the chatter frequency (RMSE) in the whole working volume and in all feed directions, applying a 10 × 10 cross validation scheme. These RMSE values are acceptable from the industrial point of view, taking into account that the critical depth of this range varies between 0.68 mm and 19.20 mm and the chatter frequency between 1.14 Hz and 65.25 Hz. Besides, Random Forest ensembles are more easily optimized than artificial neural networks (1 parameter configuration versus 210 MLPs). Additionally, tools that incorporate regression trees are interesting and highly accurate, providing immediately accessible and useful information in visual formats on critical machine performance for the design engineer.},
keywords = {Chatter, Milling, Polar diagrams, Random forest, Regression trees, Vibrations},
pubstate = {published},
tppubtype = {article}
}
Pimenov, Yu. D; Bustillo, A; Mikolajczyk, T
Artificial intelligence for automatic prediction of required surface roughness by monitoring wear on face mill teeth Journal Article
In: Journal of Intelligent Manufacturing, vol. 29, no. 5, pp. 1045–1061, 2018, ISSN: 1572-8145.
Abstract | Links | BibTeX | Tags: Cutting power, Face milling Wear, Processing time, Random forest, surface roughness
@article{Pimenov2018,
title = {Artificial intelligence for automatic prediction of required surface roughness by monitoring wear on face mill teeth},
author = {Yu. D Pimenov and A Bustillo and T Mikolajczyk},
url = {https://doi.org/10.1007/s10845-017-1381-8},
doi = {10.1007/s10845-017-1381-8},
issn = {1572-8145},
year = {2018},
date = {2018-06-01},
journal = {Journal of Intelligent Manufacturing},
volume = {29},
number = {5},
pages = {1045--1061},
abstract = {Nowadays, face milling is one of the most widely used machining processes for the generation of flat surfaces. Following international standards, the quality of a machined surface is measured in terms of surface roughness, Ra, a parameter that will decrease with increased tool wear. So, cutting inserts of the milling tool have to be changed before a given surface quality threshold is exceeded. The use of artificial intelligence methods is suggested in this paper for real-time prediction of surface roughness deviations, depending on the main drive power, and taking tool wear, $$V_B$$ V B into account. This method ensures comprehensive use of the potential of modern CNC machines that are able to monitor the main drive power, N, in real-time. It can likewise estimate the three parameters -maximum tool wear, machining time, and cutting power- that are required to generate a given surface roughness, thereby making the most efficient use of the cutting tool. A series of artificial intelligence methods are tested: random forest (RF), standard Multilayer perceptrons (MLP), Regression Trees, and radial-based functions. Random forest was shown to have the highest model accuracy, followed by regression trees, displaying higher accuracy than the standard MLP and the radial-basis function. Moreover, RF techniques are easily tuned and generate visual information for direct use by the process engineer, such as the linear relationships between process parameters and roughness, and thresholds for avoiding rapid tool wear. All of this information can be directly extracted from the tree structure or by drawing 3D charts plotting two process inputs and the predicted roughness depending on workshop requirements.},
keywords = {Cutting power, Face milling Wear, Processing time, Random forest, surface roughness},
pubstate = {published},
tppubtype = {article}
}
2017
Maudes, Jesus; Bustillo, Andrés; Guerra, Antonio J; Ciurana, Joaquim
Random Forest ensemble prediction of stent dimensions in microfabrication processes Journal Article
In: The International Journal of Advanced Manufacturing Technology, vol. 91, no. 1, pp. 879–893, 2017, ISSN: 1433-3015.
Abstract | Links | BibTeX | Tags: Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining
@article{Maudes2017,
title = {Random Forest ensemble prediction of stent dimensions in microfabrication processes},
author = {Jesus Maudes and Andrés Bustillo and Antonio J Guerra and Joaquim Ciurana},
url = {https://doi.org/10.1007/s00170-016-9695-9},
doi = {10.1007/s00170-016-9695-9},
issn = {1433-3015},
year = {2017},
date = {2017-07-01},
journal = {The International Journal of Advanced Manufacturing Technology},
volume = {91},
number = {1},
pages = {879--893},
abstract = {The recent development of new laser machine tools for the manufacture of micro-scale metallic components has boosted demand in the field of medical applications. However, the optimization of this process encounters a major problem: a knowledge gap concerning the relation between the controllable parameters of these machine tools and the quality of the machined components. Our research proposes a two-step strategy to approach this problem for the manufacture of stents. First, a screening test identifies good and bad performance conditions for the laser process and generates useful information on cutting performance; then, a stent is manufactured under different cutting conditions and the most accurate machine learning technique to model this process is identified. This strategy is validated with the performance of experiments that vary pulse duration, laser power, and cutting speed, and measure two geometrical characteristics of the stent geometry. The results showed that linear Support Vector Machines can identify good and bad cutting conditions, while Random Forest ensembles of regression trees can predict with high accuracy the two characteristics of the stent geometry under study. Besides, this technique can extract useful information from the screening test that improves its final accuracy. In view of the small dataset size, an alternative based on the leave-one-out technique was used, instead of standard cross validation, so as to assure the generalization capability of the models.},
keywords = {Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining},
pubstate = {published},
tppubtype = {article}
}
2014
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Tree ensemble construction using a GRASP-based heuristic and annealed randomness Journal Article
In: Information Fusion, vol. 20, no. 0, pp. 189–202, 2014, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest
@article{DiezPastor2014,
title = {Tree ensemble construction using a GRASP-based heuristic and annealed randomness},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://www.sciencedirect.com/science/article/pii/S1566253514000141},
doi = {10.1016/j.inffus.2014.01.009},
issn = {1566-2535},
year = {2014},
date = {2014-01-01},
journal = {Information Fusion},
volume = {20},
number = {0},
pages = {189--202},
abstract = {Abstract Two new methods for tree ensemble construction are presented: G-Forest and GAR-Forest. In a similar way to Random Forest, the tree construction process entails a degree of randomness. The same strategy used in the GRASP metaheuristic for generating random and adaptive solutions is used at each node of the trees. The source of diversity of the ensemble is the randomness of the solution generation method of GRASP. A further key feature of the tree construction method for GAR-Forest is a decreasing level of randomness during the process of constructing the tree: maximum randomness at the root and minimum randomness at the leaves. The method is therefore named ``GAR'', GRASP with annealed randomness. The results conclusively demonstrate that G-Forest and GAR-Forest outperform Bagging, AdaBoost, MultiBoost, Random Forest and Random Subspaces. The results are even more convincing in the presence of noise, demonstrating the robustness of the method. The relationship between base classifier accuracy and their diversity is analysed by application of kappa-error diagrams and a variant of these called kappa-error relative movement diagrams.},
keywords = {Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest},
pubstate = {published},
tppubtype = {article}
}
2012
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; García-Pedrajas, Nicolás
Random Feature Weights for Decision Tree Ensemble Construction Journal Article
In: Information Fusion, vol. 13, no. 1, pp. 20-30, 2012, ISSN: 1566-2535.
Links | BibTeX | Tags: Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest
@article{RFW2012,
title = {Random Feature Weights for Decision Tree Ensemble Construction},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Nicolás García-Pedrajas},
doi = {10.1016/j.inffus.2010.11.004},
issn = {1566-2535},
year = {2012},
date = {2012-01-01},
journal = {Information Fusion},
volume = {13},
number = {1},
pages = {20-30},
keywords = {Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest},
pubstate = {published},
tppubtype = {article}
}