2021
Díez-Pastor, José Francisco; Val, Alain Gil; Veiga, Fernando; Bustillo, Andrés
High-accuracy classification of thread quality in tapping processes with ensembles of classifiers for imbalanced learning Journal Article
In: Measurement, vol. 168, no. 108328, 2021, ISSN: 0263-2241.
Abstract | Links | BibTeX | Tags: Bagging, Cutting taps, Imbalanced datasets, Quality assessment, SELECTED, Threading
@article{Díez-Pastor2021,
title = {High-accuracy classification of thread quality in tapping processes with ensembles of classifiers for imbalanced learning},
author = {José Francisco Díez-Pastor and Alain Gil Val and Fernando Veiga and Andrés Bustillo},
url = {https://www.sciencedirect.com/science/article/pii/S0263224120308654},
doi = {https://doi.org/10.1016/j.measurement.2020.108328},
issn = {0263-2241},
year = {2021},
date = {2021-01-15},
journal = {Measurement},
volume = {168},
number = {108328},
abstract = {Industrial threading processes that use cutting taps are in high demand. However, industrial conditions differ markedly from laboratory conditions. In this study, a machine-learning solution is presented for the correct classification of threads, based on industrial requirements, to avoid expensive manual measurement of quality indicators. First, quality states are categorized. Second, process inputs are extracted from the torque signals including statistical parameters. Third, different machine-learning algorithms are tested: from base classifiers, such as decision trees and multilayer perceptrons, to complex ensembles of classifiers especially designed for imbalanced datasets, such as boosting and bagging decision-tree ensembles combined with SMOTE and under-sampling balancing techniques. Ensembles demonstrated the lowest sensitivity to window sizes, the highest accuracy for smaller window sizes, and the greatest learning ability with small datasets. Fourth, the combination of models with both high Recall and high Precision resulted in a reliable industrial tool, tested on an extensive experimental dataset.},
keywords = {Bagging, Cutting taps, Imbalanced datasets, Quality assessment, SELECTED, Threading},
pubstate = {published},
tppubtype = {article}
}
2020
Díez-Pastor, José Francisco; Latorre-Carmona, Pedro; Arnaiz-González, Álvar; Ruiz-Pérez, Javier; Zurro, Débora
“You Are Not My Type”: An Evaluation of Classification Methods for Automatic Phytolith Identification Journal Article
In: Microscopy and Microanalysis, vol. 26, pp. 1158-1167, 2020, ISSN: 1431-9276.
Abstract | Links | BibTeX | Tags: Feature extraction, Machine learning, Microfossils, Morphometry, Proxy
@article{Díez-Pastor2020,
title = {“You Are Not My Type”: An Evaluation of Classification Methods for Automatic Phytolith Identification},
author = {José Francisco Díez-Pastor and Pedro Latorre-Carmona and Álvar Arnaiz-González and Javier Ruiz-Pérez and Débora Zurro},
url = {https://www.cambridge.org/core/journals/microscopy-and-microanalysis/article/you-are-not-my-type-an-evaluation-of-classification-methods-for-automatic-phytolith-identification/48F88E9407086B797BBE383B8BC15904},
doi = {https://doi.org/10.1017/S1431927620024629},
issn = {1431-9276},
year = {2020},
date = {2020-11-10},
journal = {Microscopy and Microanalysis},
volume = {26},
pages = {1158-1167},
abstract = {Phytoliths can be an important source of information related to environmental and climatic change, as well as to ancient plant use by humans, particularly within the disciplines of paleoecology and archaeology. Currently, phytolith identification and categorization is performed manually by researchers, a time-consuming task liable to misclassifications. The automated classification of phytoliths would allow the standardization of identification processes, avoiding possible biases related to the classification capability of researchers. This paper presents a comparative analysis of six classification methods, using digitized microscopic images to examine the efficacy of different quantitative approaches for characterizing phytoliths. A comprehensive experiment performed on images of 429 phytoliths demonstrated that the automatic phytolith classification is a promising area of research that will help researchers to invest time more efficiently and improve their recognition accuracy rate.},
keywords = {Feature extraction, Machine learning, Microfossils, Morphometry, Proxy},
pubstate = {published},
tppubtype = {article}
}
Juez-Gil, Mario; Saucedo-Dorantes, Juan José; Arnaiz-González, Álvar; López-Nozal, Carlos; García-Osorio, César; Lowe, David
Early and extremely early multi-label fault diagnosis in induction motors Journal Article
In: ISA Transactions, vol. 106, pp. 367-381, 2020, ISSN: 0019-0578.
Abstract | Links | BibTeX | Tags: Early detection, Load insensitive model, Multi-fault detection, Multi-label classification, Prediction at low operating frequencies, Principal component analysis, SELECTED
@article{Juez-Gil2020,
title = {Early and extremely early multi-label fault diagnosis in induction motors},
author = {Mario Juez-Gil and Juan José Saucedo-Dorantes and Álvar Arnaiz-González and Carlos López-Nozal and César García-Osorio and David Lowe},
url = {https://www.sciencedirect.com/science/article/pii/S0019057820302755},
doi = {https://doi.org/10.1016/j.isatra.2020.07.002},
issn = {0019-0578},
year = {2020},
date = {2020-11-01},
journal = {ISA Transactions},
volume = {106},
pages = {367-381},
abstract = {The detection of faulty machinery and its automated diagnosis is an industrial priority because efficient fault diagnosis implies efficient management of the maintenance times, reduction of energy consumption, reduction in overall costs and, most importantly, the availability of the machinery is ensured. Thus, this paper presents a new intelligent multi-fault diagnosis method based on multiple sensor information for assessing the occurrence of single, combined, and simultaneous faulty conditions in an induction motor. The contribution and novelty of the proposed method include the consideration of different physical magnitudes such as vibrations, stator currents, voltages, and rotational speed as a meaningful source of information of the machine condition. Moreover, for each available physical magnitude, the reduction of the original number of attributes through the Principal Component Analysis leads to retain a reduced number of significant features that allows achieving the final diagnosis outcome by a multi-label classification tree. The effectiveness of the method was validated by using a complete set of experimental data acquired from a laboratory electromechanical system, where a healthy and seven faulty scenarios were assessed. Also, the interpretation of the results do not require any prior expert knowledge and the robustness of this proposal allows its application in industrial applications, since it may deal with different operating conditions such as different loads and operating frequencies. Finally, the performance was evaluated using multi-label measures, which to the best of our knowledge, is an innovative development in the field condition monitoring and fault identification.},
keywords = {Early detection, Load insensitive model, Multi-fault detection, Multi-label classification, Prediction at low operating frequencies, Principal component analysis, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Bustillo, Andrés; Reis, Roberto; Machado, Alisson R.; Pimenov, Danil Yurievich
Improving the accuracy of machine-learning models with data from machine test repetitions Journal Article
In: Journal of Intelligent Manufacturing, 2020, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: Artificial intelligence, Brandsma facing tests, Ensembles, Machine learning, Tool geometry, Turning
@article{Bustillo2020,
title = {Improving the accuracy of machine-learning models with data from machine test repetitions},
author = {Andrés Bustillo and Roberto Reis and Alisson R. Machado and Danil Yurievich Pimenov},
url = {https://link.springer.com/article/10.1007%2Fs10845-020-01661-3},
doi = {https://doi.org/10.1007/s10845-020-01661-3},
issn = {0956-5515},
year = {2020},
date = {2020-09-17},
journal = {Journal of Intelligent Manufacturing},
abstract = {The modelling of machining processes by means of machine-learning algorithms is still based on principles that are especially adapted to mechanical approaches, in which very few inputs are varied with little repetition of experimental conditions. These principles might not be ideal to achieve accurate machine-learning models and they are certainly not aligned with the practicalities of industrial machining in factories. In this research the effect of a new strategy to improve machine-learning model accuracy is studied: experimental repetition. Tool-life prediction in the face-turning operations of AISI 1045 steel discs, depending on different cooling systems and tool geometries, is selected as a case study. Both the side rake and the relief angles of HSS tools are optimized using the Brandsma facing test under dry, MQL, and flooding conditions. Different machine-learning algorithms, such as regression trees, kNNs, artificial neural networks, and ensembles (bagging and Random Forest) are tested. On the one hand, the results of the study showed that artificial neural networks of Radial Basis Functions presented the highest model accuracy (11.4 mm RMSE), but required a very sensitive and complex tuning process. On the other hand, they demonstrated that ensembles, especially Random Forest, provided models with accuracy in the same range, but with no tuning procedure (12.8 mm RMSE). Secondly, the effect of an increased dataset size, by means of experimental repetition, is evaluated and compared with traditional experimental modelling that used average values. The results showed that some machine-learning techniques, including both ensemble types, significantly improved their accuracy with this strategy, by up to 23%. The results therefore suggested that the use of raw experimental data, rather than their averaged values, can achieve machine-learning models of higher accuracy for tool-wear processes.},
keywords = {Artificial intelligence, Brandsma facing tests, Ensembles, Machine learning, Tool geometry, Turning},
pubstate = {published},
tppubtype = {article}
}
Bustillo, Andrés; Pimenov, Danil Yurievich; Mia, Mozammel; Kapłonek, Wojciech
Machine-learning for automatic prediction of flatness deviation considering the wear of the face mill teeth Journal Article
In: Journal of Intelligent Manufacturing, 2020, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: Cutting power, Face milling, Flatness deviation, Random forest, SMOTE, tool condition monitoring, Tool life, Wear
@article{Bustillo2020b,
title = {Machine-learning for automatic prediction of flatness deviation considering the wear of the face mill teeth},
author = {Andrés Bustillo and Danil Yurievich Pimenov and Mozammel Mia and Wojciech Kapłonek},
url = {https://link.springer.com/article/10.1007/s10845-020-01645-3},
doi = {https://doi.org/10.1007/s10845-020-01645-3},
issn = {0956-5515},
year = {2020},
date = {2020-09-03},
journal = {Journal of Intelligent Manufacturing},
abstract = {The acceptance of the machined surfaces not only depends on roughness parameters but also in the flatness deviation (Δfl). Hence, before reaching the threshold of flatness deviation caused by the wear of the face mill, the tool inserts need to be changed to avoid the expected product rejection. As current CNC machines have the facility to track, in real-time, the main drive power, the present study utilizes this facility to predict the flatness deviation—with proper consideration to the amount of wear of cutting tool insert’s edge. The prediction of deviation from flatness is evaluated as a regression and a classification problem, while different machine-learning techniques like Multilayer Perceptrons, Radial Basis Functions Networks, Decision Trees and Random Forest ensembles have been examined. Finally, Random Forest ensembles combined with Synthetic Minority Over-sampling Technique (SMOTE) balancing technique showed the highest performance when the flatness levels are discretized taking into account industrial requirements. The SMOTE balancing technique resulted in a very useful strategy to avoid the strong limitations that small experiment datasets produce in the accuracy of machine-learning models.},
keywords = {Cutting power, Face milling, Flatness deviation, Random forest, SMOTE, tool condition monitoring, Tool life, Wear},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Juez-Gil, Mario; Arnaiz-González, Álvar; Kuncheva, Ludmila I
An experimental evaluation of mixup regression forests Journal Article
In: Expert Systems with Applications, vol. 151, no. 113376, 2020, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: Mixup, Random forest, Regression, Rotation forest, SELECTED
@article{Rodríguez2020,
title = {An experimental evaluation of mixup regression forests},
author = {Juan José Rodríguez and Mario Juez-Gil and Álvar Arnaiz-González and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417420302013?via%3Dihub},
doi = {10.1016/j.eswa.2020.113376},
issn = {0957-4174},
year = {2020},
date = {2020-08-01},
journal = {Expert Systems with Applications},
volume = {151},
number = {113376},
abstract = {Over the past few decades, the remarkable prediction capabilities of ensemble methods have been used within a wide range of applications. Maximization of base-model ensemble accuracy and diversity are the keys to the heightened performance of these methods. One way to achieve diversity for training the base models is to generate artificial/synthetic instances for their incorporation with the original instances. Recently, the mixup method was proposed for improving the classification power of deep neural networks (Zhang, Cissé, Dauphin, and Lopez-Paz, 2017). Mixup method generates artificial instances by combining pairs of instances and their labels, these new instances are used for training the neural networks promoting its regularization. In this paper, new regression tree ensembles trained with mixup, which we will refer to as Mixup Regression Forest, are presented and tested. The experimental study with 61 datasets showed that the mixup approach improved the results of both Random Forest and Rotation Forest.},
keywords = {Mixup, Random forest, Regression, Rotation forest, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Garrido-Labrador, José Luis; Puente-Gabarri, Daniel; Ramirez-Sanz, José Miguel; Ayala-Dulanto, David; Maudes, Jesús
Using Ensembles for Accurate Modelling of Manufacturing Processes in an IoT Data-Acquisition Solution Journal Article
In: Applied Sciences, vol. 10, no. 13, 2020, ISSN: 2076-3417.
Abstract | Links | BibTeX | Tags: Ensembles, internet of things, Milling, rotation forests, unbalanced datasets
@article{Garrido-Labrador2020,
title = {Using Ensembles for Accurate Modelling of Manufacturing Processes in an IoT Data-Acquisition Solution},
author = {José Luis Garrido-Labrador and Daniel Puente-Gabarri and José Miguel Ramirez-Sanz and David Ayala-Dulanto and Jesús Maudes},
url = {https://www.mdpi.com/2076-3417/10/13/4606/htm},
doi = {https://doi.org/10.3390/app10134606},
issn = {2076-3417},
year = {2020},
date = {2020-07-02},
journal = {Applied Sciences},
volume = {10},
number = {13},
abstract = {The development of complex real-time platforms for the Internet of Things (IoT) opens up a promising future for the diagnosis and the optimization of machining processes. Many issues have still to be solved before IoT platforms can be profitable for small workshops with very flexible workloads and workflows. The main obstacles refer to sensor implementation, IoT architecture, and data processing, and analysis. In this research, the use of different machine-learning techniques is proposed, for the extraction of different information from an IoT platform connected to a machining center, working under real industrial conditions in a workshop. The aim is to evaluate which algorithmic technique might be the best to build accurate prediction models for one of the main demands of workshops: the optimization of machining processes. This evaluation, completed under real industrial conditions, includes very limited information on the machining workload of the machining center and unbalanced datasets. The strategy is validated for the classification of the state of a machining center, its working mode, and the prediction of the thermal evolution of the main machine-tool motors: the axis motors and the milling head motor. The results show the superiority of the ensembles for both classification problems under analysis and all four regression problems. In particular, Rotation Forest-based ensembles turned out to have the best performance in the experiments for all the metrics under study. The models are accurate enough to provide useful conclusions applicable to current industrial practice, such as improvements in machine programming to avoid cutting conditions that might greatly reduce tool lifetime and damage machine components.},
keywords = {Ensembles, internet of things, Milling, rotation forests, unbalanced datasets},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; Arnaiz-González, Álvar; Kuncheva, Ludmila I
Random Balance ensembles for multiclass imbalance learning Journal Article
In: Knowledge-Based Systems, 2020, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: Classifier ensembles, Imbalanced data, Multiclass classification, SELECTED
@article{Rodríguez2019,
title = {Random Balance ensembles for multiclass imbalance learning},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and Álvar Arnaiz-González and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/pii/S0950705119306598},
doi = {10.1016/j.knosys.2019.105434},
issn = {0950-7051},
year = {2020},
date = {2020-04-06},
journal = {Knowledge-Based Systems},
abstract = {Random Balance strategy (RandBal) has been recently proposed for constructing classifier ensembles for imbalanced, two-class data sets. In RandBal, each base classifier is trained with a sample of the data with a random class prevalence, independent of the a priori distribution. Hence, for each sample, one of the classes will be undersampled while the other will be oversampled. RandBal can be applied on its own or can be combined with any other ensemble method. One particularly successful variant is RandBalBoost which integrates Random Balance and boosting. Encouraged by the success of RandBal, this work proposes two approaches which extend RandBal to multiclass imbalance problems. Multiclass imbalance implies that at least two classes have substantially different proportion of instances. In the first approach proposed here, termed Multiple Random Balance (MultiRandBal), we deal with all classes simultaneously. The training data for each base classifier are sampled with random class proportions. The second approach we propose decomposes the multiclass problem into two-class problems using one-vs-one or one-vs-all, and builds an ensemble of RandBal ensembles. We call the two versions of the second approach OVO-RandBal and OVA-RandBal, respectively. These two approaches were chosen because they are the most straightforward extensions of RandBal for multiple classes. Our main objective is to evaluate both approaches for multiclass imbalanced problems. To this end, an experiment was carried out with 52 multiclass data sets. The results suggest that both MultiRandBal, and OVO/OVA-RandBal are viable extensions of the original two-class RandBal. Collectively, they consistently outperform acclaimed state-of-the art methods for multiclass imbalanced problems.},
keywords = {Classifier ensembles, Imbalanced data, Multiclass classification, SELECTED},
pubstate = {published},
tppubtype = {article}
}
2019
Checa, David; Bustillo, Andrés
A review of immersive virtual reality serious games to enhance learning and training Journal Article
In: Multimedia Tools and Applications, pp. 1-21, 2019, ISSN: 1380-7501.
Abstract | Links | BibTeX | Tags: Evaluation, Learning, SELECTED, Serious Game, Systematic Literature Review, Virtual Reality
@article{Checa2019b,
title = {A review of immersive virtual reality serious games to enhance learning and training},
author = {David Checa and Andrés Bustillo},
url = {https://link.springer.com/article/10.1007/s11042-019-08348-9?wt_mc=Internal.Event.1.SEM.ArticleAuthorOnlineFirst&utm_source=ArticleAuthorOnlineFirst&utm_medium=email&utm_content=AA_en_06082018&ArticleAuthorOnlineFirst_20191206},
doi = {10.1007/s11042-019-08348-9},
issn = {1380-7501},
year = {2019},
date = {2019-12-05},
journal = {Multimedia Tools and Applications},
pages = {1-21},
abstract = {The merger of game-based approaches and Virtual Reality (VR) environments that can enhance learning and training methodologies have a very promising future, reinforced by the widespread market-availability of affordable software and hardware tools for VR-environments. Rather than passive observers, users engage in those learning environments as active participants, permitting the development of exploration-based learning paradigms. There are separate reviews of VR technologies and serious games for educational and training purposes with a focus on only one knowledge area. However, this review covers 135 proposals for serious games in immersive VR-environments that are combinations of both VR and serious games and that offer end-user validation. First, an analysis of the forum, nationality, and date of publication of the articles is conducted. Then, the application domains, the target audience, the design of the game and its technological implementation, the performance evaluation procedure, and the results are analyzed. The aim here is to identify the factual standards of the proposed solutions and the differences between training and learning applications. Finally, the study lays the basis for future research lines that will develop serious games in immersive VR-environments, providing recommendations for the improvement of these tools and their successful application for the enhancement of both learning and training tasks.},
keywords = {Evaluation, Learning, SELECTED, Serious Game, Systematic Literature Review, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
Kordos, Mirosław; Arnaiz-González, Álvar; García-Osorio, César
Evolutionary prototype selection for multi-output regression Journal Article
In: Neurocomputing, vol. 358, pp. 309-320, 2019, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Multi-output, Multi-target, Prototype selection, Regression, SELECTED
@article{Kordos2019,
title = {Evolutionary prototype selection for multi-output regression},
author = {Mirosław Kordos and Álvar Arnaiz-González and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S0925231219307611?fbclid=IwAR1qb5kLk1-PyqfAPprRnb6Jv75rMgJS3dY1rDqWF610G2lCttEW3QIBU4c},
doi = {10.1016/j.neucom.2019.05.055},
issn = {0925-2312},
year = {2019},
date = {2019-09-17},
journal = {Neurocomputing},
volume = {358},
pages = {309-320},
abstract = {A novel approach to prototype selection for multi-output regression data sets is presented. A multi-objective evolutionary algorithm is used to evaluate the selections using two criteria: training data set compression and prediction quality expressed in terms of root mean squared error. A multi-target regressor based on k-NN was used for that purpose during the training to evaluate the error, while the tests were performed using four different multi-target predictive models. The distance matrices used by the multi-target regressor were cached to accelerate operational performance. Multiple Pareto fronts were also used to prevent overfitting and to obtain a broader range of solutions, by using different probabilities in the initialization of populations and different evolutionary parameters in each one. The results obtained with the benchmark data sets showed that the proposed method greatly reduced data set size and, at the same time, improved the predictive capabilities of the multi-output regressors trained on the reduced data set.},
keywords = {Multi-output, Multi-target, Prototype selection, Regression, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Juez-Gil, Mario; Erdakov, Ivan Nikolaevich; Bustillo, Andrés; Pimenov, Danil Yurievich
A regression-tree multilayer-perceptron hybrid strategy for the prediction of ore crushing-plate lifetimes Journal Article
In: Journal of Advanced Research, vol. July 2019, no. 18, pp. 173-184, 2019, ISSN: 2090-1232.
Abstract | Links | BibTeX | Tags: Artificial intelligence, Hadfield steel, Lifetime prediction, Multi-layer perceptrons, Regression trees, Resource savings
@article{Juez-Gil2019,
title = {A regression-tree multilayer-perceptron hybrid strategy for the prediction of ore crushing-plate lifetimes},
author = {Mario Juez-Gil and Ivan Nikolaevich Erdakov and Andrés Bustillo and Danil Yurievich Pimenov},
doi = {10.1016/j.jare.2019.03.008},
issn = {2090-1232},
year = {2019},
date = {2019-07-01},
journal = {Journal of Advanced Research},
volume = {July 2019},
number = {18},
pages = {173-184},
abstract = {Highly tensile manganese steel is in great demand owing to its high tensile strength under shock loads. All workpieces are produced through casting, because it is highly difficult to machine. The probabilistic aspects of its casting, its variable composition, and the different casting techniques must all be considered for the optimisation of its mechanical properties. A hybrid strategy is therefore proposed which combines decision trees and artificial neural networks (ANNs) for accurate and reliable prediction models for ore crushing plate lifetimes. The strategic blend of these two high-accuracy prediction models is used to generate simple decision trees which can reveal the main dataset features, thereby facilitating decision-making. Following a complexity analysis of a dataset with 450 different plates, the best model consisted of 9 different multilayer perceptrons, the inputs of which were only the Fe and Mn plate compositions. The model recorded a low root mean square error (RMSE) of only 0.0614 h for the lifetime of the plate: a very accurate result considering their varied lifetimes of between 746 and 6902 h in the dataset. Finally, the use of these models under real industrial conditions is presented in a heat map, namely a 2D representation of the main manufacturing process inputs with a colour scale which shows the predicted output, i.e. the expected lifetime of the manufactured plates. Thus, the hybrid strategy extracts core training dataset information in high-accuracy prediction models. This novel strategy merges the different capabilities of two families of machine-learning algorithms. It provides a high-accuracy industrial tool for the prediction of the full lifetime of highly tensile manganese steel plates. The results yielded a precision prediction of (RMSE of 0.061 h) for the full lifetime of (light, medium, and heavy) crusher plates manufactured with the three (experimental, classic, and highly efficient (new)) casting methods.},
keywords = {Artificial intelligence, Hadfield steel, Lifetime prediction, Multi-layer perceptrons, Regression trees, Resource savings},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I; Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Gunn, Iain A D
Instance selection improves geometric mean accuracy: a study on imbalanced data classification Journal Article
In: Progress in Artificial Intelligence, vol. 8, no. 2, pp. 215-228, 2019, ISSN: 2192-6352.
Abstract | Links | BibTeX | Tags: Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour, Theoretical perspective
@article{Kuncheva2019,
title = {Instance selection improves geometric mean accuracy: a study on imbalanced data classification},
author = {Ludmila I Kuncheva and Álvar Arnaiz-González and José Francisco Díez-Pastor and Iain A D Gunn},
url = {https://link.springer.com/article/10.1007/s13748-019-00172-4?wt_mc=Internal.Event.1.SEM.ArticleAuthorOnlineFirst&utm_source=ArticleAuthorContributingOnlineFirst&utm_medium=email&utm_content=AA_en_06082018&ArticleAuthorContributingOnlineFirst_20190209},
doi = {10.1007/s13748-019-00172-4},
issn = {2192-6352},
year = {2019},
date = {2019-06-01},
journal = {Progress in Artificial Intelligence},
volume = {8},
number = {2},
pages = {215-228},
abstract = {A natural way of handling imbalanced data is to attempt to equalise the class frequencies and train the classifier of choice on balanced data. For two-class imbalanced problems, the classification success is typically measured by the geometric mean (GM) of the true positive and true negative rates. Here we prove that GM can be improved upon by instance selection, and give the theoretical conditions for such an improvement. We demonstrate that GM is non-monotonic with respect to the number of retained instances, which discourages systematic instance selection. We also show that balancing the distribution frequencies is inferior to a direct maximisation of GM. To verify our theoretical findings, we carried out an experimental study of 12 instance selection methods for imbalanced data, using 66 standard benchmark data sets. The results reveal possible room for new instance selection methods for imbalanced data.},
keywords = {Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour, Theoretical perspective},
pubstate = {published},
tppubtype = {article}
}
Alonso-Abad, Jesús M.; López-Nozal, Carlos; Maudes-Raedo, Jesús; Marticorena-Sánchez, Raúl
Label prediction on issue tracking systems using text mining Journal Article
In: Progress in Artificial Intelligence, pp. 1-18, 2019, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Experimentation in software engineering, Issue tracker system, Label prediction, Text classifier, Text mining
@article{Alonso-Abad2019,
title = {Label prediction on issue tracking systems using text mining},
author = {Jesús M. Alonso-Abad and Carlos López-Nozal and Jesús Maudes-Raedo and Raúl Marticorena-Sánchez},
url = {https://link.springer.com/article/10.1007/s13748-019-00182-2?wt_mc=Internal.Event.1.SEM.ArticleAuthorOnlineFirst&utm_source=ArticleAuthorOnlineFirst&utm_medium=email&utm_content=AA_en_06082018&ArticleAuthorOnlineFirst_20190331},
doi = {10.1007/s13748-019-00182-2},
issn = {2192-6360},
year = {2019},
date = {2019-03-28},
journal = {Progress in Artificial Intelligence},
pages = {1-18},
abstract = {Issue tracking systems are overall change-management tools in software development. The issue-solving life cycle is a complex socio-technical activity that requires team discussion and knowledge sharing between members. In that process, issue classification facilitates an understanding of issues and their analysis. Issue tracking systems permit the tagging of issues with default labels (e.g., bug, enhancement) or with customized team labels (e.g., test failures, performance). However, a current problem is that many issues in open-source projects remain unlabeled. The aim of this paper is to improve maintenance tasks in development teams, evaluating models that can suggest a label for an issue using its text comments. We analyze data on issues from several GitHub trending projects, first by extracting issue information and then by applying text mining classifiers (i.e., support vector machine and naive Bayes multinomial). The results suggest that very suitable classifiers may be obtained to label the issues or, at least, to suggest the most suitable candidate labels.},
keywords = {Experimentation in software engineering, Issue tracker system, Label prediction, Text classifier, Text mining},
pubstate = {published},
tppubtype = {article}
}
Beranoagirre, Aitor; Urbikain, Gorka; Marticorena-Sánchez, Raúl; Bustillo, Andrés; Lacalle,
Sensitivity Analysis of Tool Wear in Drilling of Titanium Aluminides Journal Article
In: Metals, vol. 9, no. 3, pp. 297, 2019, ISSN: 2075-4701.
Abstract | Links | BibTeX | Tags: difficult-to-cut materials, drilling, gamma TiAl, titanium aluminides
@article{Beranoagirre2019,
title = {Sensitivity Analysis of Tool Wear in Drilling of Titanium Aluminides},
author = {Aitor Beranoagirre and Gorka Urbikain and Raúl Marticorena-Sánchez and Andrés Bustillo and Lacalle},
doi = {10.3390/met9030297},
issn = {2075-4701},
year = {2019},
date = {2019-03-06},
journal = {Metals},
volume = {9},
number = {3},
pages = {297},
abstract = {In the aerospace industry, a large number of holes need to be drilled to mechanically connect the components of aircraft engines. The working conditions for such components demand a good response of their mechanical properties at high temperatures. The new gamma TiAl are in the transition between the 2nd and 3rd generation, and several applications are proposed for that sector. Thus, NASA is proposing the use of the alloys in the Revolutionary Turbine Accelerator/Turbine-Based Combined Cycle (RTA/TBCC) Program for the next-generation launch vehicle, with gamma TiAl as a potential compressor and structural material. However, the information and datasets available regarding cutting performance in titanium aluminides are relatively scarce. So, a considerable part of the current research efforts in this field is dedicated to process optimization of cutting parameters and tool geometries. The present work is framed in the study of wear when machining holes in these difficult-to-cut alloys. In particular, the work presents the results from drilling tests on three types of gamma TiAl alloys, extruded MoCuSi, ingot MoCuSi, and TNB type, to define an optimal set of cutting parameters. Maintaining uniform, gradual wear is key to avoiding tool breakage and enabling good hole dimensional accuracy. So, this paper proposes a model based on ANOVA analysis to identify the relationships between cutting conditions and resulting wear and estimate tool life. The best cutting parameters were found at vc = 10–15 m/min and fn = 0.025 mm/rev.},
keywords = {difficult-to-cut materials, drilling, gamma TiAl, titanium aluminides},
pubstate = {published},
tppubtype = {article}
}
Pimenov, Danil Yurievich; Hassui, Amauri; Wojciechowski, Szymon; Mia, Mozammel; Magri, Aristides; Suyama, Daniel I.; Bustillo, Andrés; Krolczyk, Grzegorz; Gupta, Munish Kumar
Effect of the Relative Position of the Face Milling Tool towards the Workpiece on Machined Surface Roughness and Milling Dynamics Journal Article
In: Applied Sciences, vol. 9, no. 5, pp. 842, 2019, ISSN: 2076-3417.
Abstract | Links | BibTeX | Tags: acceleration, cutting force, Face milling, relative position, surface roughness
@article{Pimenov2019,
title = {Effect of the Relative Position of the Face Milling Tool towards the Workpiece on Machined Surface Roughness and Milling Dynamics},
author = {Danil Yurievich Pimenov and Amauri Hassui and Szymon Wojciechowski and Mozammel Mia and Aristides Magri and Daniel I. Suyama and Andrés Bustillo and Grzegorz Krolczyk and Munish Kumar Gupta},
doi = {10.3390/app9050842},
issn = {2076-3417},
year = {2019},
date = {2019-02-27},
journal = {Applied Sciences},
volume = {9},
number = {5},
pages = {842},
abstract = {In face milling one of the most important parameters of the process quality is the roughness of the machined surface. In many articles, the influence of cutting regimes on the roughness and cutting forces of face milling is considered. However, during flat face milling with the milling width B lower than the cutter’s diameter D, the influence of such an important parameter as the relative position of the face mill towards the workpiece and the milling kinematics (Up or Down milling) on the cutting force components and the roughness of the machined surface has not been sufficiently studied. At the same time, the values of the cutting force components can vary significantly depending on the relative position of the face mill towards the workpiece, and thus have a different effect on the power expended on the milling process. Having studied this influence, it is possible to formulate useful recommendations for a technologist who creates a technological process using face milling operations. It is possible to choose such a relative position of the face mill and workpiece that will provide the smallest value of the surface roughness obtained by face milling. This paper shows the influence of the relative position of the face mill towards the workpiece and milling kinematics on the components of the cutting forces, the acceleration of the machine spindle in the process of face milling (considering the rotation of the mill for a full revolution), and on the surface roughness obtained by face milling. Practical recommendations on the assignment of the relative position of the face mill towards the workpiece and the milling kinematics are given.},
keywords = {acceleration, cutting force, Face milling, relative position, surface roughness},
pubstate = {published},
tppubtype = {article}
}
Faithfull, William J; Rodríguez, Juan José; Kuncheva, Ludmila I
Combining univariate approaches for ensemble change detection in multivariate data Journal Article
In: Information Fusion, vol. 45, pp. 202-214, 2019, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Change detection, Ensemble methods, Multivariate data, SELECTED
@article{Faithfull2019,
title = {Combining univariate approaches for ensemble change detection in multivariate data},
author = {William J Faithfull and Juan José Rodríguez and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/pii/S1566253517301239},
doi = {10.1016/j.inffus.2018.02.003},
issn = {1566-2535},
year = {2019},
date = {2019-01-01},
journal = {Information Fusion},
volume = {45},
pages = {202-214},
abstract = {Detecting change in multivariate data is a challenging problem, especially when class labels are not available. There is a large body of research on univariate change detection, notably in control charts developed originally for engineering applications. We evaluate univariate change detection approaches —including those in the MOA framework — built into ensembles where each member observes a feature in the input space of an unsupervised change detection problem. We present a comparison between the ensemble combinations and three established ‘pure’ multivariate approaches over 96 data sets, and a case study on the KDD Cup 1999 network intrusion detection dataset. We found that ensemble combination of univariate methods consistently outperformed multivariate methods on the four experimental metrics.},
keywords = {Change detection, Ensemble methods, Multivariate data, SELECTED},
pubstate = {published},
tppubtype = {article}
}
2018
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Study of data transformation techniques for adapting single-label prototype selection algorithms to multi-label learning Journal Article
In: Expert Systems with Applications, vol. 109, pp. 114-130, 2018, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: Binary relevance, Label powerset, Multi-label classification, Prototype selection, RAkEL
@article{Arnaiz-González2018,
title = {Study of data transformation techniques for adapting single-label prototype selection algorithms to multi-label learning},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S0957417418303087},
doi = {10.1016/j.eswa.2018.05.017},
issn = {0957-4174},
year = {2018},
date = {2018-11-01},
journal = {Expert Systems with Applications},
volume = {109},
pages = {114-130},
abstract = {In this paper, the focus is on the application of prototype selection to multi-label data sets as a preliminary stage in the learning process. There are two general strategies when designing Machine Learning algorithms that are capable of dealing with multi-label problems: data transformation and method adaptation. These strategies have been successfully applied in obtaining classifiers and regressors for multi-label learning. Here we investigate the feasibility of data transformation in obtaining prototype selection algorithms for multi-label data sets from three prototype selection algorithms for single-label. The data transformation methods used were: binary relevance, dependent binary relevance, label powerset, and random k-labelsets. The general conclusion is that the methods of prototype selection obtained using data transformation are not better than those obtained through method adaptation. Moreover, prototype selection algorithms designed for multi-label do not do an entirely satisfactory job, because, although they reduce the size of the data set, without affecting significantly the accuracy, the classifier trained with the reduced data set does not improve the accuracy of the classifier when it is trained with the whole data set.},
keywords = {Binary relevance, Label powerset, Multi-label classification, Prototype selection, RAkEL},
pubstate = {published},
tppubtype = {article}
}
Oleaga, Ibone; Pardo, Carlos; Zulaika, Juan J; Bustillo, Andres
A machine-learning based solution for chatter prediction in heavy-duty milling machines Journal Article
In: Measurement, vol. 128, pp. 34 - 44, 2018, ISSN: 0263-2241.
Abstract | Links | BibTeX | Tags: Chatter, Milling, Polar diagrams, Random forest, Regression trees, Vibrations
@article{OLEAGA201834,
title = {A machine-learning based solution for chatter prediction in heavy-duty milling machines},
author = {Ibone Oleaga and Carlos Pardo and Juan J Zulaika and Andres Bustillo},
url = {http://www.sciencedirect.com/science/article/pii/S0263224118305542},
doi = {https://doi.org/10.1016/j.measurement.2018.06.028},
issn = {0263-2241},
year = {2018},
date = {2018-11-01},
journal = {Measurement},
volume = {128},
pages = {34 - 44},
abstract = {The main productivity constraints of milling operations are self-induced vibrations, especially regenerative chatter vibrations. Two key parameters are linked to these vibrations: the depth of cut achievable without vibrations and the chatter frequency. Both parameters are linked to the dynamics of machine component excitation and the milling operation parameters. Their identification in any cutting direction in milling machine operations requires complex analytical models and mechatronic simulations, usually only applied to identify the worst cutting conditions in operating machines. This work proposes the use of machine learning techniques with no need to calculate the two above-mentioned parameters by means of a 3-step strategy. The strategy combines: 1) experimental frequency responses collected at the tool center point; 2) analytical calculations of both parameters; and, 3) different machine learning techniques. The results of these calculations can then be used to predict chatter under different combinations of milling directions and machine positions. This strategy is validated with real experiments on a bridge milling machine performing concordance roughing operations on AISI 1045 steel with a 125 mm diameter mill fitted with nine cutters at 45°, the results of which have confirmed the high variability of both parameters along the working volume. The following regression techniques are tested: artificial neural networks, regression trees and Random Forest. The results show that Random Forest ensembles provided the highest accuracy with a statistical advantage over the other machine learning models; they achieved a final accuracy of 0.95 mm for the critical depth and 7.3 Hz for the chatter frequency (RMSE) in the whole working volume and in all feed directions, applying a 10 × 10 cross validation scheme. These RMSE values are acceptable from the industrial point of view, taking into account that the critical depth of this range varies between 0.68 mm and 19.20 mm and the chatter frequency between 1.14 Hz and 65.25 Hz. Besides, Random Forest ensembles are more easily optimized than artificial neural networks (1 parameter configuration versus 210 MLPs). Additionally, tools that incorporate regression trees are interesting and highly accurate, providing immediately accessible and useful information in visual formats on critical machine performance for the design engineer.},
keywords = {Chatter, Milling, Polar diagrams, Random forest, Regression trees, Vibrations},
pubstate = {published},
tppubtype = {article}
}
Checa, David; Zulaika, Juan J; Lazkanotegi, Iñigo; Bustillo, Andrés
Optimización del proceso de mecanizado de grandes piezas de fundición mediante la monitorización remota y la visualización 3D Journal Article
In: DYNA Ingeniería e Industria, vol. 93, no. 1, pp. 668–674, 2018, ISSN: 19891490.
Abstract | Links | BibTeX | Tags: Indicadores claves de rendimiento, Máquina herramienta, Optimización del mecanizado, Realidad Virtual
@article{CHECA2018,
title = {Optimización del proceso de mecanizado de grandes piezas de fundición mediante la monitorización remota y la visualización 3D},
author = {David Checa and Juan J Zulaika and Iñigo Lazkanotegi and Andrés Bustillo},
url = {http://www.revistadyna.com/Articulos/Ficha.aspx?IdMenu=a5c9d895-28e0-4f92-b0c2-c0f86f2a940b&Cod=8816&Idioma=es-ES},
doi = {10.6036/8816},
issn = {19891490},
year = {2018},
date = {2018-11-01},
journal = {DYNA Ingeniería e Industria},
volume = {93},
number = {1},
pages = {668–674},
abstract = {El desarrollo en los últimos años de distintas tecnologías englobadas en el paradigma Industria 4.0 abre la puerta a la monitorización intensiva de las máquinas herramienta. En este trabajo se presenta una plataforma de adquisición y monitorización tanto 2D como 3D del funcionamiento de máquinas-herramienta que busca facilitar la toma de decisiones para la optimización de la producción. Esta plataforma está compuesta por: 1) un sistema de adquisición de datos que procesa la información recopilada por el PLC y el CNC de la máquina y por cualquier otro sensor integrado en la misma, 2) un servidor remoto que guarda los datos recogidos y 3) un conjunto de interfaces 2D y 3D que permiten tanto calcular indicadores claves de rendimiento en tiempo real como analizar un proceso concreto de mecanizado en un entorno virtual 3D mediante Oculus Rift y Oculus Touch para detectar anomalías en el proceso de mecanizado. El funcionamiento de esta plataforma se ha validado en una fresadora de pórtico que realiza el mecanizado de una pieza de fundición de grandes dimensiones. El resultado de este estudio muestra cómo se pueden detectar tres tipos de anomalías en el proceso de mecanizado y cómo el entorno inmersivo 3D facilita que el ingeniero de proceso detecte estas anomalías, en especial en el caso de ingenieros de proceso junior.},
keywords = {Indicadores claves de rendimiento, Máquina herramienta, Optimización del mecanizado, Realidad Virtual},
pubstate = {published},
tppubtype = {article}
}
Bustillo, A; Pimenov, D. Yu.; Matuszewski, M; Mikolajczyk, T
Using artificial intelligence models for the prediction of surface wear based on surface isotropy levels Journal Article
In: Robotics and Computer-Integrated Manufacturing, vol. 53, pp. 215 - 227, 2018, ISSN: 0736-5845.
Abstract | Links | BibTeX | Tags: Ensembles, Isotropy level geometric structure of the surface, Roughness, Small size dataset, Wear
@article{BUSTILLO2018215,
title = {Using artificial intelligence models for the prediction of surface wear based on surface isotropy levels},
author = {A Bustillo and D. Yu. Pimenov and M Matuszewski and T Mikolajczyk},
url = {http://www.sciencedirect.com/science/article/pii/S0736584517303733},
doi = {https://doi.org/10.1016/j.rcim.2018.03.011},
issn = {0736-5845},
year = {2018},
date = {2018-10-01},
journal = {Robotics and Computer-Integrated Manufacturing},
volume = {53},
pages = {215 - 227},
abstract = {Currently, a key industrial challenge in friction processes is the prediction of surface roughness and loss of mass under different machining processes, such as Electro-Discharge Machining (EDM), and turning and grinding processes. Under industrial conditions, only the sliding distance is easily evaluated in friction processes, while the acquisition of other variables usually implies expensive costs for production centres, such as the integration of sensors in functioning machine-tools. Besides, appropriate datasets are usually very small, because the testing of different friction conditions is also expensive. These two restrictions, small datasets and very few inputs, make it very difficult to use Artificial Intelligence (AI) techniques to model the industrial problem. So, the use of the isotropy level of the surface structure is proposed, as another input that is easily evaluated prior to the friction process. In this example, the friction processes of a cubic sample of 102Cr6 (40 HRC) steel and a further element made of X210Cr12 (60 HRC) steel are considered. Different artificial intelligence techniques, such as artificial regression trees, multilayer perceptrons (MLPs), radial basis networks (RBFs), and Random Forest, were tested considering the isotropy level as either a nominal or a numeric attribute, to evaluate improvements in the accuracy of surface roughness and loss-of-mass predictions. The results obtained with real datasets showed that RBFs and MLPs provided the most accurate models for loss of mass and surface roughness prediction, respectively. MLPs have slightly higher surface prediction accuracy than Random Forest, although MLP models are very sensitive to the tuning of their parameters (a small mismatch between the learning rate and the momentum in the MLP will drastically reduce the accuracy of the model). In contrast, Random Forest has no parameter to be tuned and its prediction is almost as good as MLPs for surface roughness, so Random Forest will be more suitable for industrial use where no expert in AI model tuning is available. Moreover, the inclusion of the isotropy level in the dataset, especially as a numeric attribute, greatly improved the accuracy of the models, in some cases, by up to 52% for MLPs, and by a smaller proportion of 16% in the Random Forest models in terms of Root Mean Square Error. Finally, Random Forest ensembles only trained with low and very high isotropy level experimental datasets generated reliable models for medium levels of isotropy, thereby offering a solution to reduce the size of training datasets.},
keywords = {Ensembles, Isotropy level geometric structure of the surface, Roughness, Small size dataset, Wear},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I; Rodríguez, Juan José
On feature selection protocols for very low-sample-size data Journal Article
In: Pattern Recognition, vol. 81, pp. 660-673, 2018, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: Cross-validation, Experimental protocol, Feature selection, SELECTED, Training/testing, Wide datasets
@article{Kuncheva2018b,
title = {On feature selection protocols for very low-sample-size data},
author = {Ludmila I Kuncheva and Juan José Rodríguez},
url = {https://www.sciencedirect.com/science/article/pii/S003132031830102X},
doi = {10.1016/j.patcog.2018.03.012},
issn = {0031-3203},
year = {2018},
date = {2018-09-01},
journal = {Pattern Recognition},
volume = {81},
pages = {660-673},
abstract = {High-dimensional data with very few instances are typical in many application domains. Selecting a highly discriminative subset of the original features is often the main interest of the end user. The widely-used feature selection protocol for such type of data consists of two steps. First, features are selected from the data (possibly through cross-validation), and, second, a cross-validation protocol is applied to test a classifier using the selected features. The selected feature set and the testing accuracy are then returned to the user. For the lack of a better option, the same low-sample-size dataset is used in both steps. Questioning the validity of this protocol, we carried out an experiment using 24 high-dimensional datasets, three feature selection methods and five classifier models. We found that the accuracy returned by the above protocol is heavily biased, and therefore propose an alternative protocol which avoids the contamination by including both steps in a single cross-validation loop. Statistical tests verify that the classification accuracy returned by the proper protocol is significantly closer to the true accuracy (estimated from an independent testing set) compared to that returned by the currently favoured protocol.},
keywords = {Cross-validation, Experimental protocol, Feature selection, SELECTED, Training/testing, Wide datasets},
pubstate = {published},
tppubtype = {article}
}
Bustillo, Andres; Urbikain, Gorka; Perez, Jose M; Pereira, Octavio M; Lacalle, Luis Lopez N
Smart optimization of a friction-drilling process based on boosting ensembles Journal Article
In: Journal of Manufacturing Systems, 2018, ISSN: 0278-6125.
Abstract | Links | BibTeX | Tags: Boosting, Ensembles, Friction drilling, Gap prediction, Small-size dataset
@article{BUSTILLO2018b,
title = {Smart optimization of a friction-drilling process based on boosting ensembles},
author = {Andres Bustillo and Gorka Urbikain and Jose M Perez and Octavio M Pereira and Luis Lopez N Lacalle},
url = {http://www.sciencedirect.com/science/article/pii/S0278612518301249},
doi = {https://doi.org/10.1016/j.jmsy.2018.06.004},
issn = {0278-6125},
year = {2018},
date = {2018-08-16},
journal = {Journal of Manufacturing Systems},
abstract = {Form and friction drilling techniques are now promising alternatives in light and medium boilermaking that will very probably supersede conventional drilling techniques, as rapid and economic solutions for producing nutless bolted joints. Nonetheless, given the number of cutting parameters involved, optimization of the process requires calibration of the main input parameters in relation to the desired output values. Among these values, the gap between plates determines the service life of the joint. In this paper, a suitable smart manufacturing strategy for real industrial conditions is proposed, where it is necessary to identify the most accurate machine-learning technique to process experimental datasets of a small size. The strategy is first to generate a small-size dataset under real industrial conditions, then the gap is discretized taking into account the specific industrial needs of this quality indicator for each product. Finally, the different machine learning models are tested and fine-tuned to ascertain the most accurate model at the lowest cost. The strategy is validated with a 48 condition-dataset where only feed-rate and rotation speed are used as inputs and the gap as the output. The results on this dataset showed that the Adaboost ensembles provided the highest accuracy and were more easily optimized than artificial neural networks.},
keywords = {Boosting, Ensembles, Friction drilling, Gap prediction, Small-size dataset},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Local sets for multi-label instance selection Journal Article
In: Applied Soft Computing, vol. 68, pp. 651-666, 2018, ISSN: 1568-4946.
Abstract | Links | BibTeX | Tags: Data reduction, Instance selection, Local set, Multi-label classification, Nearest neighbor, SELECTED
@article{Arnaiz-González2018b,
title = {Local sets for multi-label instance selection},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S1568494618302072},
doi = {10.1016/j.asoc.2018.04.016},
issn = {1568-4946},
year = {2018},
date = {2018-07-01},
journal = {Applied Soft Computing},
volume = {68},
pages = {651-666},
abstract = {The multi-label classification problem is an extension of traditional (single-label) classification, in which the output is a vector of values rather than a single categorical value. The multi-label problem is therefore a very different and much more challenging one than the single-label problem. Recently, multi-label classification has attracted interest, because of its real-life applications, such as image recognition, bio-informatics, and text categorization, among others. Unfortunately, there are few instance selection techniques capable of processing the data used for these applications. These techniques are also very useful for cleaning and reducing the size of data sets.
In single-label problems, the local set of an instance x comprises all instances in the largest hypersphere centered on x, so that they are all of the same class. This concept has been successfully integrated in the design of Iterative Case Filtering, one of the most influential instance selection methods in single-label learning. Unfortunately, the concept that was originally defined for single-label learning cannot be directly applied to multi-label data, as each instance has more than one label.
An adaptation of the local set concept to multi-label data is proposed in this paper and its effectiveness is verified in the design of two new algorithms that yielded competitive results. One of the adaptations cleans the data sets, to improve their predictive capabilities, while the other aims to reduce data set sizes. Both are tested and compared against the state-of-the-art instance selection methods available for multi-label learning.},
keywords = {Data reduction, Instance selection, Local set, Multi-label classification, Nearest neighbor, SELECTED},
pubstate = {published},
tppubtype = {article}
}
In single-label problems, the local set of an instance x comprises all instances in the largest hypersphere centered on x, so that they are all of the same class. This concept has been successfully integrated in the design of Iterative Case Filtering, one of the most influential instance selection methods in single-label learning. Unfortunately, the concept that was originally defined for single-label learning cannot be directly applied to multi-label data, as each instance has more than one label.
An adaptation of the local set concept to multi-label data is proposed in this paper and its effectiveness is verified in the design of two new algorithms that yielded competitive results. One of the adaptations cleans the data sets, to improve their predictive capabilities, while the other aims to reduce data set sizes. Both are tested and compared against the state-of-the-art instance selection methods available for multi-label learning.
Pimenov, Yu. D; Bustillo, A; Mikolajczyk, T
Artificial intelligence for automatic prediction of required surface roughness by monitoring wear on face mill teeth Journal Article
In: Journal of Intelligent Manufacturing, vol. 29, no. 5, pp. 1045–1061, 2018, ISSN: 1572-8145.
Abstract | Links | BibTeX | Tags: Cutting power, Face milling Wear, Processing time, Random forest, surface roughness
@article{Pimenov2018,
title = {Artificial intelligence for automatic prediction of required surface roughness by monitoring wear on face mill teeth},
author = {Yu. D Pimenov and A Bustillo and T Mikolajczyk},
url = {https://doi.org/10.1007/s10845-017-1381-8},
doi = {10.1007/s10845-017-1381-8},
issn = {1572-8145},
year = {2018},
date = {2018-06-01},
journal = {Journal of Intelligent Manufacturing},
volume = {29},
number = {5},
pages = {1045–1061},
abstract = {Nowadays, face milling is one of the most widely used machining processes for the generation of flat surfaces. Following international standards, the quality of a machined surface is measured in terms of surface roughness, Ra, a parameter that will decrease with increased tool wear. So, cutting inserts of the milling tool have to be changed before a given surface quality threshold is exceeded. The use of artificial intelligence methods is suggested in this paper for real-time prediction of surface roughness deviations, depending on the main drive power, and taking tool wear, $$V_B$$ V B into account. This method ensures comprehensive use of the potential of modern CNC machines that are able to monitor the main drive power, N, in real-time. It can likewise estimate the three parameters -maximum tool wear, machining time, and cutting power- that are required to generate a given surface roughness, thereby making the most efficient use of the cutting tool. A series of artificial intelligence methods are tested: random forest (RF), standard Multilayer perceptrons (MLP), Regression Trees, and radial-based functions. Random forest was shown to have the highest model accuracy, followed by regression trees, displaying higher accuracy than the standard MLP and the radial-basis function. Moreover, RF techniques are easily tuned and generate visual information for direct use by the process engineer, such as the linear relationships between process parameters and roughness, and thresholds for avoiding rapid tool wear. All of this information can be directly extracted from the tree structure or by drawing 3D charts plotting two process inputs and the predicted roughness depending on workshop requirements.},
keywords = {Cutting power, Face milling Wear, Processing time, Random forest, surface roughness},
pubstate = {published},
tppubtype = {article}
}
Mikołajczyk, T; Nowicki, K; Bustillo, A; Pimenov, Yu D
Predicting tool life in turning operations using neural networks and image processing Journal Article
In: Mechanical Systems and Signal Processing, vol. 104, pp. 503 - 513, 2018, ISSN: 0888-3270.
Abstract | Links | BibTeX | Tags: Image analysis, Neural networks, Tool life prediction, Tool wear
@article{MIKOLAJCZYK2018503,
title = {Predicting tool life in turning operations using neural networks and image processing},
author = {T Mikołajczyk and K Nowicki and A Bustillo and Yu D Pimenov},
url = {http://www.sciencedirect.com/science/article/pii/S088832701730599X},
doi = {https://doi.org/10.1016/j.ymssp.2017.11.022},
issn = {0888-3270},
year = {2018},
date = {2018-05-01},
journal = {Mechanical Systems and Signal Processing},
volume = {104},
pages = {503 - 513},
abstract = {A two-step method is presented for the automatic prediction of tool life in turning operations. First, experimental data are collected for three cutting edges under the same constant processing conditions. In these experiments, the parameter of tool wear, VB, is measured with conventional methods and the same parameter is estimated using Neural Wear, a customized software package that combines flank wear image recognition and Artificial Neural Networks (ANNs). Second, an ANN model of tool life is trained with the data collected from the first two cutting edges and the subsequent model is evaluated on two different subsets for the third cutting edge: the first subset is obtained from the direct measurement of tool wear and the second is obtained from the Neural Wear software that estimates tool wear using edge images. Although the complete-automated solution, Neural Wear software for tool wear recognition plus the ANN model of tool life prediction, presented a slightly higher error than the direct measurements, it was within the same range and can meet all industrial requirements. These results confirm that the combination of image recognition software and ANN modelling could potentially be developed into a useful industrial tool for low-cost estimation of tool life in turning operations.},
keywords = {Image analysis, Neural networks, Tool life prediction, Tool wear},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I; Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Gunn, Iain A D
Instance Selection Improves Geometric Mean Accuracy: A Study on Imbalanced Data Classification Journal Article
In: arXiv, 2018.
Abstract | Links | BibTeX | Tags: Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour
@article{Kuncheva2018,
title = {Instance Selection Improves Geometric Mean Accuracy: A Study on Imbalanced Data Classification},
author = {Ludmila I Kuncheva and Álvar Arnaiz-González and José Francisco Díez-Pastor and Iain A D Gunn},
url = {https://arxiv.org/abs/1804.07155},
doi = {arXiv:1804.07155v1},
year = {2018},
date = {2018-04-19},
journal = {arXiv},
abstract = {A natural way of handling imbalanced data is to attempt to equalise the class frequencies and train the classifier of choice on balanced data. For two-class imbalanced problems, the classification success is typically measured by the geometric mean (GM) of the true positive and true negative rates. Here we prove that GM can be improved upon by instance selection, and give the theoretical conditions for such an improvement. We demonstrate that GM is non-monotonic with respect to the number of retained instances, which discourages systematic instance selection. We also show that balancing the distribution frequencies is inferior to a direct maximisation of GM. To verify our theoretical findings, we carried out an experimental study of 12 instance selection methods for imbalanced data, using 66 standard benchmark data sets. The results reveal possible room for new instance selection methods for imbalanced data.},
keywords = {Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour},
pubstate = {published},
tppubtype = {article}
}
Gunn, Iain A D; Arnaiz-González, Álvar; Kuncheva, Ludmila I
A taxonomic look at instance-based stream classifiers Journal Article
In: Neurocomputing, vol. 286, pp. 167-178, 2018, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Concept drift, Instance selection, Machine learning, Prototype generation, Stream classification
@article{Gunn2018,
title = {A taxonomic look at instance-based stream classifiers},
author = {Iain A D Gunn and Álvar Arnaiz-González and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/pii/S092523121830095X},
doi = {10.1016/j.neucom.2018.01.062},
issn = {0925-2312},
year = {2018},
date = {2018-04-19},
journal = {Neurocomputing},
volume = {286},
pages = {167-178},
abstract = {Large numbers of data streams are today generated in many fields. A key challenge when learning from such streams is the problem of concept drift. Many methods, including many prototype methods, have been proposed in recent years to address this problem. This paper presents a refined taxonomy of instance selection and generation methods for the classification of data streams subject to concept drift. The taxonomy allows discrimination among a large number of methods which pre-existing taxonomies for offline instance selection methods did not distinguish. This makes possible a valuable new perspective on experimental results, and provides a framework for discussion of the concepts behind different algorithm-design approaches. We review a selection of modern algorithms for the purpose of illustrating the distinctions made by the taxonomy. We present the results of a numerical experiment which examined the performance of a number of representative methods on both synthetic and real-world data sets with and without concept drift, and discuss the implications for the directions of future research in light of the taxonomy. On the basis of the experimental results, we are able to give recommendations for the experimental evaluation of algorithms which may be proposed in the future.},
keywords = {Concept drift, Instance selection, Machine learning, Prototype generation, Stream classification},
pubstate = {published},
tppubtype = {article}
}
Grzenda, Maciej; Bustillo, Andres
Semi-supervised roughness prediction with partly unlabeled vibration data streams Journal Article
In: Journal of Intelligent Manufacturing, pp. 1-13, 2018, ISSN: 1572-8145.
Abstract | Links | BibTeX | Tags: Face milling, Roughness prediction, Semi-supervised techniques, Unlabeled data
@article{Grzenda2018,
title = {Semi-supervised roughness prediction with partly unlabeled vibration data streams},
author = {Maciej Grzenda and Andres Bustillo},
url = {https://doi.org/10.1007/s10845-018-1413-z},
doi = {10.1007/s10845-018-1413-z},
issn = {1572-8145},
year = {2018},
date = {2018-03-23},
journal = {Journal of Intelligent Manufacturing},
pages = {1-13},
abstract = {Experimental data sets that include tool settings, tool and machine-tool behavior, and surface roughness data for milling processes are usually of limited size, due mainly to the high costs of machining tests. This fact restricts the application of machine-learning techniques for surface roughness prediction in industrial settings. The primary objective of this work is to investigate the way data streams that are missing product features (i.e. unlabeled data streams) can contribute to the development of prediction models. The investigation is followed by a proposal for a semi-supervised approach to the development of roughness prediction models that can use partly unlabeled data to improve the accuracy of roughness prediction. Following this strategy, records collected during the milling process, which miss roughness measurements, but contain vibration data are used to increase the accuracy of the prediction models. The method proposed in this work is based on the selective use of such unlabelled instances, collected at tool settings that are not represented in the labeled data. This strategy, when applied properly, yields both extended training data sets and higher accuracy in the roughness prediction models that are derived from them. The scale of accuracy improvement and its statistical significance are shown in the study case of high-torque face milling of F114 steel. The semi-supervised approach proposed in this work has been used in combination with supervised k Nearest Neighbours and random forest techniques. Furthermore, the study of both continuous and discretized roughness prediction, showed higher gains in accuracy in the second.},
keywords = {Face milling, Roughness prediction, Semi-supervised techniques, Unlabeled data},
pubstate = {published},
tppubtype = {article}
}
Santos, Pedro; Maudes-Raedo, Jesús; Bustillo, Andrés
Identifying maximum imbalance in datasets for fault diagnosis of gearboxes Journal Article
In: Journal of Intelligent Manufacturing, vol. 29, no. 2, pp. 333-351, 2018, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: Fault diagnosis, Metrics Gearbox, Multi-class imbalance, Wind turbines Ensembles
@article{Santos2018,
title = {Identifying maximum imbalance in datasets for fault diagnosis of gearboxes},
author = {Pedro Santos and Jesús Maudes-Raedo and Andrés Bustillo},
url = {https://link.springer.com/article/10.1007%2Fs10845-015-1110-0},
doi = {10.1007/s10845-015-1110-0},
issn = {0956-5515},
year = {2018},
date = {2018-02-01},
journal = {Journal of Intelligent Manufacturing},
volume = {29},
number = {2},
pages = {333-351},
abstract = {Research into fault diagnosis in rotating machinery with a wide range of variable loads and speeds, such as the gearboxes of wind turbines, is of great industrial interest. Although appropriate sensors have been identified, an intelligent system that classifies machine states remains an open issue, due to a paucity of datasets with sufficient fault cases. Many of the proposed solutions have been tested on balanced datasets, containing roughly equal percentages of wind-turbine failure instances and instances of correct performance. In practice, however, it is not possible to obtain balanced datasets under real operating conditions. Our objective is to identify the most suitable classification technique that will depend least of all on the level of imbalance in the dataset. We start by analysing different metrics for the comparison of classification techniques on imbalanced datasets. Our results pointed to the Unweighted Macro Average of the F-measure, which we consider the most suitable metric for this diagnosis. Then, an extensive set of classification techniques was tested on datasets with varying levels of imbalance. Our conclusion is that a Rotation Forest ensemble of C4.4 decision trees, modifying the training phase of the classifier with a cost-sensitive approach, is the most suitable prediction model for this industrial task. It maintained its good performance even when the minority classes rate was as low as 6.5 %, while the majority of the other classifiers were more sensitive to the level of database imbalance and failed standard performance objectives, when the minority classes rate was lower than 10.5 %.},
keywords = {Fault diagnosis, Metrics Gearbox, Multi-class imbalance, Wind turbines Ensembles},
pubstate = {published},
tppubtype = {article}
}
Güemes-Peña, Diego; López-Nozal, Carlos; Marticorena-Sánchez, Raúl; Maudes-Raedo, Jesús
Emerging topics in mining software repositories Journal Article
In: Progress in Artificial Intelligence, pp. 1-11, 2018, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Data Mining, Machine learning, Software engineering, Software process, Software repository
@article{Güemes-Peña2018,
title = {Emerging topics in mining software repositories},
author = {Diego Güemes-Peña and Carlos López-Nozal and Raúl Marticorena-Sánchez and Jesús Maudes-Raedo},
url = {https://link.springer.com/content/pdf/10.1007/s13748-018-0147-7.pdf},
doi = {10.1007/s13748-018-0147-7},
issn = {2192-6360},
year = {2018},
date = {2018-01-01},
journal = {Progress in Artificial Intelligence},
pages = {1-11},
abstract = {A software process is a set of related activities that culminates in the production of a software package: specification, design, implementation, testing, evolution into new versions, and maintenance. There are also other supporting activities such as configuration and change management, quality assurance, project management, evaluation of user experience, etc. Software repositories are infrastructures to support all these activities. They can be composed with several systems that include code change management, bug tracking, code review, build system, release binaries, wikis, forums, etc. This position paper on mining software repositories presents a review and a discussion of research in this field over the past decade. We also identify applied machine learning strategies, current working topics, and future challenges for the improvement of company decision-making systems. Machine learning is defined as the process of discovering patterns in data. It can be applied to software repositories, since every change is recorded as data. Companies can then use these patterns as the basis for their decision-making systems and for knowledge discovery.},
keywords = {Data Mining, Machine learning, Software engineering, Software process, Software repository},
pubstate = {published},
tppubtype = {article}
}
Mikolajczyk, Tadeusz; Fuwen, Hu; Moldovan, Liviu; Bustillo, Andres; Matuszewski, Maciej; Nowicki, Krzysztof
Selection of machining parameters with Android application made using MIT App Inventor bookmarks Journal Article
In: Procedia Manufacturing, vol. 22, pp. 172 - 179, 2018, ISSN: 2351-9789, (11th International Conference Interdisciplinarity in Engineering, INTER-ENG 2017, 5-6 October 2017, Tirgu Mures, Romania).
Abstract | Links | BibTeX | Tags: Android, machining parameters, MIT inventor, mobile application
@article{MIKOLAJCZYK2018172,
title = {Selection of machining parameters with Android application made using MIT App Inventor bookmarks},
author = {Tadeusz Mikolajczyk and Hu Fuwen and Liviu Moldovan and Andres Bustillo and Maciej Matuszewski and Krzysztof Nowicki},
url = {http://www.sciencedirect.com/science/article/pii/S2351978918303214},
doi = {https://doi.org/10.1016/j.promfg.2018.03.027},
issn = {2351-9789},
year = {2018},
date = {2018-01-01},
journal = {Procedia Manufacturing},
volume = {22},
pages = {172 - 179},
abstract = {Undoubtedly mobile devices are gaining more and more popularity. However, the breakthrough in mobile applications is yet to be followed by a breakthrough in manufacturing industry. The paper presents a new methodology for application development on the Android platform in MIT App Inventor bookmarks. The research method consists in an algorithm and application design. Also it was presented an example of an elaborate program SpeedCalc for the lathe spindle speed selection for the determined value of cutting speed in relation to the diameter of the work piece. The program can be used with a mobile phone or tablet.},
note = {11th International Conference Interdisciplinarity in Engineering, INTER-ENG 2017, 5-6 October 2017, Tirgu Mures, Romania},
keywords = {Android, machining parameters, MIT inventor, mobile application},
pubstate = {published},
tppubtype = {article}
}
2017
Maudes, Jesus; Bustillo, Andrés; Guerra, Antonio J; Ciurana, Joaquim
Random Forest ensemble prediction of stent dimensions in microfabrication processes Journal Article
In: The International Journal of Advanced Manufacturing Technology, vol. 91, no. 1, pp. 879–893, 2017, ISSN: 1433-3015.
Abstract | Links | BibTeX | Tags: Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining
@article{Maudes2017,
title = {Random Forest ensemble prediction of stent dimensions in microfabrication processes},
author = {Jesus Maudes and Andrés Bustillo and Antonio J Guerra and Joaquim Ciurana},
url = {https://doi.org/10.1007/s00170-016-9695-9},
doi = {10.1007/s00170-016-9695-9},
issn = {1433-3015},
year = {2017},
date = {2017-07-01},
journal = {The International Journal of Advanced Manufacturing Technology},
volume = {91},
number = {1},
pages = {879–893},
abstract = {The recent development of new laser machine tools for the manufacture of micro-scale metallic components has boosted demand in the field of medical applications. However, the optimization of this process encounters a major problem: a knowledge gap concerning the relation between the controllable parameters of these machine tools and the quality of the machined components. Our research proposes a two-step strategy to approach this problem for the manufacture of stents. First, a screening test identifies good and bad performance conditions for the laser process and generates useful information on cutting performance; then, a stent is manufactured under different cutting conditions and the most accurate machine learning technique to model this process is identified. This strategy is validated with the performance of experiments that vary pulse duration, laser power, and cutting speed, and measure two geometrical characteristics of the stent geometry. The results showed that linear Support Vector Machines can identify good and bad cutting conditions, while Random Forest ensembles of regression trees can predict with high accuracy the two characteristics of the stent geometry under study. Besides, this technique can extract useful information from the screening test that improves its final accuracy. In view of the small dataset size, an alternative based on the leave-one-out technique was used, instead of standard cross validation, so as to assure the generalization capability of the models.},
keywords = {Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I; Rodríguez, Juan José; Jackson, Aaron S
Restricted set classification: Who is there? Journal Article
In: Pattern Recognition, vol. 63, pp. 158-170, 2017, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: Chess pieces classification, Compound decision problem, Object classification, Pattern recognition, Restricted set classification, SELECTED
@article{Kuncheva2017,
title = {Restricted set classification: Who is there?},
author = {Ludmila I Kuncheva and Juan José Rodríguez and Aaron S Jackson},
url = {https://www.sciencedirect.com/science/article/pii/S0031320316302412},
doi = {10.1016/j.patcog.2016.08.028},
issn = {0031-3203},
year = {2017},
date = {2017-03-01},
journal = {Pattern Recognition},
volume = {63},
pages = {158-170},
abstract = {We consider a problem where a set X of N objects (instances) coming from c classes have to be classified simultaneously. A restriction is imposed on X in that the maximum possible number of objects from each class is known, hence we dubbed the problem who-is-there? We compare three approaches to this problem: (1) independent classification whereby each object is labelled in the class with the largest posterior probability; (2) a greedy approach which enforces the restriction; and (3) a theoretical approach which, in addition, maximises the likelihood of the label assignment, implemented through the Hungarian assignment algorithm. Our experimental study consists of two parts. The first part includes a custom-made chess data set where the pieces on the chess board must be recognised together from an image of the board. In the second part, we simulate the restricted set classification scenario using 96 datasets from a recently collated repository (University of Santiago de Compostela, USC). Our results show that the proposed approach (3) outperforms approaches (1) and (2).},
keywords = {Chess pieces classification, Compound decision problem, Object classification, Pattern recognition, Restricted set classification, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Sáiz-Manzanares, María Consuelo; Marticorena-Sánchez, Raúl; García-Osorio, César; Díez-Pastor, José Francisco
How Do B-Learning and Learning Patterns Influence Learning Outcomes? Journal Article
In: Frontiers in Psychology, vol. 8, pp. 745, 2017, ISSN: 1664-1078.
Abstract | Links | BibTeX | Tags: B-Learning, Computer Science teaching, eLearning, SELECTED
@article{10.3389/fpsyg.2017.00745,
title = {How Do B-Learning and Learning Patterns Influence Learning Outcomes?},
author = {María Consuelo Sáiz-Manzanares and Raúl Marticorena-Sánchez and César García-Osorio and José Francisco Díez-Pastor},
url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2017.00745},
doi = {10.3389/fpsyg.2017.00745},
issn = {1664-1078},
year = {2017},
date = {2017-01-01},
journal = {Frontiers in Psychology},
volume = {8},
pages = {745},
abstract = {Learning Management System (LMS) platforms provide a wealth of information on the learning patterns of students. Learning Analytics (LA) techniques permit the analysis of the logs or records of the activities of both students and teachers on the on-line platform. The learning patterns differ depending on the type of Blended Learning (B-Learning). In this study, we analyse: 1) whether significant differences exist between the learning outcomes of students and their learning patterns on the platform, depending on the type of B-Learning [Replacement blend (RB) vs. Supplemental blend (SB)]; 2) whether a relation exists between the metacognitive and the motivational strategies of students, their learning outcomes and their learning patterns on the platform. The 87,065 log records of 129 students (69 in RB and 60 in SB) in the Moodle 3.1 platform were analysed. The results revealed different learning patterns between students depending on the type of B-Learning (RB vs. SB). We have found that the degree of blend, RB vs. SB, seems to condition student behaviour on the platform. Learning patterns in RB environments can predict student learning outcomes. Additionally, in RB environments there is a relationship between the learning patterns and the metacognitive and motivational strategies of the students.},
keywords = {B-Learning, Computer Science teaching, eLearning, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Quintana, Guillem; Bustillo, Andrés; Ciurana, Joaquim
A decision-making tool based on decision trees for roughness prediction in face milling Journal Article
In: International Journal of Computer Integrated Manufacturing, vol. 30, no. 9, 2017, ISSN: 0951-192X.
Abstract | Links | BibTeX | Tags: AI in manufacturing systems, cost management, decision support systems, Decision trees, process control, surface roughness, tool condition monitoring
@article{Rodríguez2017,
title = {A decision-making tool based on decision trees for roughness prediction in face milling},
author = {Juan José Rodríguez and Guillem Quintana and Andrés Bustillo and Joaquim Ciurana},
url = {https://www.tandfonline.com/doi/full/10.1080/0951192X.2016.1247991},
doi = {10.1080/0951192X.2016.1247991},
issn = {0951-192X},
year = {2017},
date = {2017-01-01},
journal = {International Journal of Computer Integrated Manufacturing},
volume = {30},
number = {9},
abstract = {The selection of the right cutting tool in manufacturing process design is always an open question, especially when different tools are available on the market with similar characteristics, but marked differences in price, ranging from low-cost to high-performance cutting tools. The ultimate decision of the engineer will depend on previous experience with the life cycle of the tool and its performance, but without the support of a systematic knowledge base. This research presents a decision-making system based on soft-computing techniques. First, several experiments were carried out with four different cutting tools: two flat-milling low-cost tools without any surface treatment or coating and two high-performance, high-cost cutting tools (in both cases with four cutting edges, similar geometrical features and diameters). Three different measures of tool wear are considered in the context of real workshop conditions: on-line power consumption, cutting length and volume of cut material. Finally, decision trees have been selected as the most suitable technique for building a decision-making system for two reasons: these trees show higher accuracy for the prediction of roughness in terms of tool wear and tool type. They also provide useful visual feedback on the information that is extracted from the real data, which can be directly used by the process engineer.},
keywords = {AI in manufacturing systems, cost management, decision support systems, Decision trees, process control, surface roughness, tool condition monitoring},
pubstate = {published},
tppubtype = {article}
}
2016
Palasciano, Claudio; Bustillo, Andres; Fantini, Paola; Taisch, Marco
A new approach for machine's management: from machine's signal acquisition to energy indexes Journal Article
In: Journal of Cleaner Production, vol. 137, pp. 1503 - 1515, 2016, ISSN: 0959-6526.
Abstract | Links | BibTeX | Tags: Energy and resource efficient manufacturing, Energy efficiency KPIs, Energy efficient manufacturing modeling, Energy-aware machine control
@article{PALASCIANO20161503,
title = {A new approach for machine's management: from machine's signal acquisition to energy indexes},
author = {Claudio Palasciano and Andres Bustillo and Paola Fantini and Marco Taisch},
url = {http://www.sciencedirect.com/science/article/pii/S0959652616309180},
doi = {https://doi.org/10.1016/j.jclepro.2016.07.030},
issn = {0959-6526},
year = {2016},
date = {2016-11-20},
journal = {Journal of Cleaner Production},
volume = {137},
pages = {1503 - 1515},
abstract = {In the highly competitive modern-day industrial landscape, characterized by globalization and resource scarcity, manufacturers are striving to improve economic and environmental performance. Innovation that enables self-adjustment, control and optimization of the energy consumption of individual machines continues. However, more research is needed if such systems are to be deployed successfully, especially considering the complex characteristics of the energy flows in the factory. In this paper we propose a novel approach to the coordination of information, processing and sensing systems for energy and resource efficient production systems. By leveraging on a recently-developed framework focusing on physical flows of energy, materials and waste we propose a solution based on specific energy efficiency KPIs and an online data acquisition/processing system, that enables real-time monitoring of the current status of the machining process and lagging assessment of system energy efficiency. The proposed solution allows the identification of abnormal energy consumption during the operational machine cycle, caused by incorrect part dimensioning or erroneous cutting conditions programmed by the process engineer, enabling identification of potential disruptions with different gravity levels, and delivery of meaningful alarms for the operator. Adaptive control of the machine cutting conditions or even trajectory re-programming is then possible, by correlating the energy-consumption data with other data, such as head temperature. Furthermore, by analysing the energy consumption of value and non value adding activities over complete production cycles (such as a shift or day), it is possible to monitor the progress of production systems toward achieving energy efficiency targets and to conduct root-cause analysis of inefficient energy usage for continuous improvement programs. We tested the proposed solution, modeling, index system ad online data acquisition/processing platform, through an industrial case study by deploying the developed hardware and software modules on a Nicolás Correa S.A. VERSA milling machine.},
keywords = {Energy and resource efficient manufacturing, Energy efficiency KPIs, Energy efficient manufacturing modeling, Energy-aware machine control},
pubstate = {published},
tppubtype = {article}
}
Bustillo, Andres; Lacalle, Luis López N; Fernández-Valdivielso, Asier; Santos, Pedro
Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components Journal Article
In: Journal of Computational Design and Engineering, vol. 3, no. 4, pp. 337 - 348, 2016, ISSN: 2288-4300.
Abstract | Links | BibTeX | Tags: Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading
@article{BUSTILLO2016337,
title = {Data-mining modeling for the prediction of wear on forming-taps in the threading of steel components},
author = {Andres Bustillo and Luis López N Lacalle and Asier Fernández-Valdivielso and Pedro Santos},
url = {http://www.sciencedirect.com/science/article/pii/S2288430016300306},
doi = {https://doi.org/10.1016/j.jcde.2016.06.002},
issn = {2288-4300},
year = {2016},
date = {2016-10-01},
journal = {Journal of Computational Design and Engineering},
volume = {3},
number = {4},
pages = {337 - 348},
abstract = {An experimental approach is presented for the measurement of wear that is common in the threading of cold-forged steel. In this work, the first objective is to measure wear on various types of roll taps manufactured to tapping holes in microalloyed HR45 steel. Different geometries and levels of wear are tested and measured. Taking their geometry as the critical factor, the types of forming tap with the least wear and the best performance are identified. Abrasive wear was observed on the forming lobes. A higher number of lobes in the chamber zone and around the nominal diameter meant a more uniform load distribution and a more gradual forming process. A second objective is to identify the most accurate data-mining technique for the prediction of form-tap wear. Different data-mining techniques are tested to select the most accurate one: from standard versions such as Multilayer Perceptrons, Support Vector Machines and Regression Trees to the most recent ones such as Rotation Forest ensembles and Iterated Bagging ensembles. The best results were obtained with ensembles of Rotation Forest with unpruned Regression Trees as base regressors that reduced the RMS error of the best-tested baseline technique for the lower length output by 33%, and Additive Regression with unpruned M5P as base regressors that reduced the RMS errors of the linear fit for the upper and total lengths by 25% and 39%, respectively. However, the lower length was statistically more difficult to model in Additive Regression than in Rotation Forest. Rotation Forest with unpruned Regression Trees as base regressors therefore appeared to be the most suitable regressor for the modeling of this industrial problem.},
keywords = {Ensembles, Forming taps, Regression trees, Roll taps, Roll-tap wear, Rotation forest, Threading},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Blachnik, Marcin; Kordos, Mirosław; García-Osorio, César
Fusion of instance selection methods in regression tasks Journal Article
In: Information Fusion, vol. 30, pp. 69 - 79, 2016, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Instance selection, Regression, SELECTED
@article{ArnaizGonzalez201669,
title = {Fusion of instance selection methods in regression tasks},
author = {Álvar Arnaiz-González and Marcin Blachnik and Mirosław Kordos and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S1566253515001141},
doi = {10.1016/j.inffus.2015.12.002},
issn = {1566-2535},
year = {2016},
date = {2016-01-01},
journal = {Information Fusion},
volume = {30},
pages = {69 - 79},
abstract = {Abstract Data pre-processing is a very important aspect of data mining. In this paper we discuss instance selection used for prediction algorithms, which is one of the pre-processing approaches. The purpose of instance selection is to improve the data quality by data size reduction and noise elimination. Until recently, instance selection has been applied mainly to classification problems. Very few recent papers address instance selection for regression tasks. This paper proposes fusion of instance selection algorithms for regression tasks to improve the selection performance. As the members of the ensemble two different families of instance selection methods are evaluated: one based on distance threshold and the other one on converting the regression task into a multiple class classification task. Extensive experimental evaluation performed on the two regression versions of the Edited Nearest Neighbor (ENN) and Condensed Nearest Neighbor (CNN) methods showed that the best performance measured by the error value and data size reduction are in most cases obtained for the ensemble methods.},
keywords = {Data Mining, Ensemble methods, Instance selection, Regression, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection for regression by discretization Journal Article
In: Expert Systems With Applications, 2016, ISSN: 0957-4174.
Links | BibTeX | Tags: Data Mining, Instance selection, Regression
@article{ArnaizGonzalez201669b,
title = {Instance selection for regression by discretization},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
doi = {10.1016/j.eswa.2015.12.046},
issn = {0957-4174},
year = {2016},
date = {2016-01-01},
journal = {Expert Systems With Applications},
keywords = {Data Mining, Instance selection, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection for regression: Adapting DROP Journal Article
In: Neurocomputing, vol. 201, pp. 66–81, 2016, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Data Mining, DROP, Instance selection, Noise filtering, Regression
@article{ArnaizGonzález2016,
title = {Instance selection for regression: Adapting DROP},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S0925231216301953},
doi = {10.1016/j.neucom.2016.04.003},
issn = {0925-2312},
year = {2016},
date = {2016-01-01},
journal = {Neurocomputing},
volume = {201},
pages = {66–81},
abstract = {Abstract Machine Learning has two central processes of interest that captivate the scientific community: classification and regression. Although instance selection for classification has shown its usefulness and has been researched in depth, instance selection for regression has not followed the same path and there are few published algorithms on the subject. In this paper, we propose that various adaptations of DROP, a well-known family of instance selection methods for classification, be applied to regression. Their behaviour is analysed using a broad range of datasets. The results are presented of the analysis of four new proposals for the reduction of dataset size, the effect on error when several classifiers are trained with the reduced dataset, and their robustness against noise. This last aspect is especially important, since in real life, it is frequent that the registered data be inexact and present distortions due to different causes: errors in the measurement tools, typos when writing results, existence of outliers and spurious readings, corruption in files, etc. When the datasets are small it is possible to manually correct these problems, but for big and huge datasets is better to have automatic methods to deal with these problems. In the experimental part, the proposed methods are found to be quite robust to noise.},
keywords = {Data Mining, DROP, Instance selection, Noise filtering, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection of linear complexity for big data Journal Article
In: Knowledge-Based Systems, vol. 107, pp. 83–95, 2016, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: Big data, Data Mining, Data reduction, Hashing, Instance selection, Nearest neighbors, SELECTED
@article{ArnaizGonzálezLSHIS2016,
title = {Instance selection of linear complexity for big data},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S0950705116301617},
doi = {10.1016/j.knosys.2016.05.056},
issn = {0950-7051},
year = {2016},
date = {2016-01-01},
journal = {Knowledge-Based Systems},
volume = {107},
pages = {83–95},
abstract = {Abstract Over recent decades, database sizes have grown considerably. Larger sizes present new challenges, because machine learning algorithms are not prepared to process such large volumes of information. Instance selection methods can alleviate this problem when the size of the data set is medium to large. However, even these methods face similar problems with very large-to-massive data sets. In this paper, two new algorithms with linear complexity for instance selection purposes are presented. Both algorithms use locality-sensitive hashing to find similarities between instances. While the complexity of conventional methods (usually quadratic, O ( n 2 ) , or log-linear, O ( n log n ) ) means that they are unable to process large-sized data sets, the new proposal shows competitive results in terms of accuracy. Even more remarkably, it shortens execution time, as the proposal manages to reduce complexity and make it linear with respect to the data set size. The new proposal has been compared with some of the best known instance selection methods for testing and has also been evaluated on large data sets (up to a million instances).},
keywords = {Big data, Data Mining, Data reduction, Hashing, Instance selection, Nearest neighbors, SELECTED},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Random feature weights for regression trees Journal Article
In: Progress in Artificial Intelligence, vol. 5, no. 2, pp. 91–103, 2016, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Regression
@article{Arnaiz-González2016,
title = {Random feature weights for regression trees},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://dx.doi.org/10.1007/s13748-016-0081-5},
doi = {10.1007/s13748-016-0081-5},
issn = {2192-6360},
year = {2016},
date = {2016-01-01},
journal = {Progress in Artificial Intelligence},
volume = {5},
number = {2},
pages = {91–103},
abstract = {Ensembles are learning methods the operation of which relies on a combination of different base models. The diversity of ensembles is a fundamental aspect that conditions their operation. Random Feature Weights RFW was proposed as a classification-tree ensemble construction method in which diversity is introduced into each tree by means of a random weight associated with each attribute. These weights vary from one tree to another in the ensemble. In this article, the idea of RFW is adapted to decision-tree regression. A comparison is drawn with other ensemble construction methods: Bagging, Random Forest, Iterated Bagging, Random Subspaces and AdaBoost.R2 obtaining competitive results.},
keywords = {Data Mining, Ensemble methods, Regression},
pubstate = {published},
tppubtype = {article}
}
2015
Santos, Pedro; Villa, Luisa F; Reñones, Anibal; Bustillo, Andrés; Maudes-Raedo, Jesús
An SVM-Based Solution for Fault Detection in Wind Turbines Journal Article
In: Sensors, vol. 15, no. 3, pp. 5627-5648, 2015, ISSN: 1424-8220.
Abstract | Links | BibTeX | Tags: Fault diagnosis, Neural networks, Support vector machines, wind turbines
@article{Santos2015,
title = {An SVM-Based Solution for Fault Detection in Wind Turbines},
author = {Pedro Santos and Luisa F Villa and Anibal Reñones and Andrés Bustillo and Jesús Maudes-Raedo},
url = {http://www.mdpi.com/1424-8220/15/3/5627},
doi = {10.3390/s150305627},
issn = {1424-8220},
year = {2015},
date = {2015-03-09},
journal = {Sensors},
volume = {15},
number = {3},
pages = {5627-5648},
abstract = {Research into fault diagnosis in machines with a wide range of variable loads and speeds, such as wind turbines, is of great industrial interest. Analysis of the power signals emitted by wind turbines for the diagnosis of mechanical faults in their mechanical transmission chain is insufficient. A successful diagnosis requires the inclusion of accelerometers to evaluate vibrations. This work presents a multi-sensory system for fault diagnosis in wind turbines, combined with a data-mining solution for the classification of the operational state of the turbine. The selected sensors are accelerometers, in which vibration signals are processed using angular resampling techniques and electrical, torque and speed measurements. Support vector machines (SVMs) are selected for the classification task, including two traditional and two promising new kernels. This multi-sensory system has been validated on a test-bed that simulates the real conditions of wind turbines with two fault typologies: misalignment and imbalance. Comparison of SVM performance with the results of artificial neural networks (ANNs) shows that linear kernel SVM outperforms other kernels and ANNs in terms of accuracy, training and tuning times. The suitability and superior performance of linear SVM is also experimentally analyzed, to conclude that this data acquisition technique generates linearly separable datasets.},
keywords = {Fault diagnosis, Neural networks, Support vector machines, wind turbines},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Random Balance: Ensembles of variable priors classifiers for imbalanced data Journal Article
In: Knowledge-Based Systems, vol. 85, pp. 96-111, 2015, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SELECTED, SMOTE, Undersampling
@article{RandomBalance,
title = {Random Balance: Ensembles of variable priors classifiers for imbalanced data},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0950705115001720},
doi = {10.1016/j.knosys.2015.04.022},
issn = {0950-7051},
year = {2015},
date = {2015-01-01},
journal = {Knowledge-Based Systems},
volume = {85},
pages = {96-111},
abstract = {Abstract In Machine Learning, a data set is imbalanced when the class proportions are highly skewed. Class-imbalanced problems sets arise routinely in many application domains and pose a challenge to traditional classifiers. We propose a new approach to building ensembles of classifiers for two-class imbalanced data sets, called Random Balance. Each member of the Random Balance ensemble is trained with data sampled from the training set and augmented by artificial instances obtained using SMOTE. The novelty in the approach is that the proportions of the classes for each ensemble member are chosen randomly. The intuition behind the method is that the proposed diversity heuristic will ensure that the ensemble contains classifiers that are specialized for different operating points on the ROC space, thereby leading to larger AUC compared to other ensembles of classifiers. Experiments have been carried out to test the Random Balance approach by itself, and also in combination with standard ensemble methods. As a result, we propose a new ensemble creation method called RB-Boost which combines Random Balance with AdaBoost.M2. This combination involves enforcing random class proportions in addition to instance re-weighting. Experiments with 86 imbalanced data sets from two well known repositories demonstrate the advantage of the Random Balance approach.},
keywords = {AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SELECTED, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article
In: Information Sciences, vol. 325, pp. 98 - 117, 2015, ISSN: 0020-0255.
Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SELECTED, SMOTE, Undersampling
@article{DiezPastor201598,
title = {Diversity techniques improve the performance of the best imbalance learning ensembles},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186},
doi = {10.1016/j.ins.2015.07.025},
issn = {0020-0255},
year = {2015},
date = {2015-01-01},
journal = {Information Sciences},
volume = {325},
pages = {98 - 117},
abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.},
keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SELECTED, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
López, Carlos; Marticorena-Sánchez, Raúl; Díez-Pastor, José Francisco; García-Osorio, César
Acquisition of Transferable Skills Associated with Software Maintenance and Development Using Tools for Versioning and Task Management Journal Article
In: International Journal of Engineering Education, vol. 31, no. 3, pp. 839–850, 2015, ISSN: 0949-149X.
Abstract | BibTeX | Tags: Computer Science teaching, Organizational skills, Software configuration, Software engineering education, Transferable skills
@article{clopezno2015,
title = {Acquisition of Transferable Skills Associated with Software Maintenance and Development Using Tools for Versioning and Task Management},
author = {Carlos López and Raúl Marticorena-Sánchez and José Francisco Díez-Pastor and César García-Osorio},
issn = {0949-149X},
year = {2015},
date = {2015-01-01},
journal = {International Journal of Engineering Education},
volume = {31},
number = {3},
pages = {839–850},
abstract = {Tools for version control and task planning allow monitoring and collecting information on the software development and maintenance processes. This work describes the use of these types of tools in subject modules related to these fields. Instead of simply describing the tools as part of the subject content, the idea is to use them to promote and evaluate the acquisition of certain generic skills related to the subjects. After selecting the skills, this paper surveys the possible tools and their field of application at different levels of mastery, and concludes with an analysis of the impact of selected tools in the acquisition of those skills. This analysis was conducted through surveys of students from different courses in the knowledge area of software engineering.},
keywords = {Computer Science teaching, Organizational skills, Software configuration, Software engineering education, Transferable skills},
pubstate = {published},
tppubtype = {article}
}
2014
Santos, Pedro; Teixidor, Daniel; Maudes-Raedo, Jesús; Ciurana, Joaquim
Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles Journal Article
In: Journal of Applied Mathematics, vol. 2014, pp. 15, 2014, ISBN: 1110-757X.
Abstract | Links | BibTeX | Tags: Ensemble methods, Laser milling, Neural networks, Support vector machines
@article{Santos2014,
title = {Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles},
author = {Pedro Santos and Daniel Teixidor and Jesús Maudes-Raedo and Joaquim Ciurana},
url = {https://www.hindawi.com/journals/jam/2014/439091/},
doi = {10.1155/2014/439091},
isbn = {1110-757X},
year = {2014},
date = {2014-04-17},
journal = {Journal of Applied Mathematics},
volume = {2014},
pages = {15},
abstract = {A set of designed experiments, involving the use of a pulsed Nd:YAG laser system milling 316L Stainless Steel, serve to study the laser-milling process of microcavities in the manufacture of drug-eluting stents (DES). Diameter, depth, and volume error are considered to be optimized as functions of the process parameters, which include laser intensity, pulse frequency, and scanning speed. Two different DES shapes are studied that combine semispheres and cylinders. Process inputs and outputs are defined by considering the process parameters that can be changed under industrial conditions and the industrial requirements of this manufacturing process. In total, 162 different conditions are tested in a process that is modeled with the following state-of-the-art data-mining regression techniques: Support Vector Regression, Ensembles, Artificial Neural Networks, Linear Regression, and Nearest Neighbor Regression. Ensemble regression emerged as the most suitable technique for studying this industrial problem. Specifically, Iterated Bagging ensembles with unpruned model trees outperformed the other methods in the tests. This method can predict the geometrical dimensions of the machined microcavities with relative errors related to the main average value in the range of 3 to 23%, which are considered very accurate predictions, in view of the characteristics of this innovative industrial task.},
keywords = {Ensemble methods, Laser milling, Neural networks, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Tree ensemble construction using a GRASP-based heuristic and annealed randomness Journal Article
In: Information Fusion, vol. 20, no. 0, pp. 189–202, 2014, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest
@article{DiezPastor2014,
title = {Tree ensemble construction using a GRASP-based heuristic and annealed randomness},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://www.sciencedirect.com/science/article/pii/S1566253514000141},
doi = {10.1016/j.inffus.2014.01.009},
issn = {1566-2535},
year = {2014},
date = {2014-01-01},
journal = {Information Fusion},
volume = {20},
number = {0},
pages = {189–202},
abstract = {Abstract Two new methods for tree ensemble construction are presented: G-Forest and GAR-Forest. In a similar way to Random Forest, the tree construction process entails a degree of randomness. The same strategy used in the GRASP metaheuristic for generating random and adaptive solutions is used at each node of the trees. The source of diversity of the ensemble is the randomness of the solution generation method of GRASP. A further key feature of the tree construction method for GAR-Forest is a decreasing level of randomness during the process of constructing the tree: maximum randomness at the root and minimum randomness at the leaves. The method is therefore named ``GAR'', GRASP with annealed randomness. The results conclusively demonstrate that G-Forest and GAR-Forest outperform Bagging, AdaBoost, MultiBoost, Random Forest and Random Subspaces. The results are even more convincing in the presence of noise, demonstrating the robustness of the method. The relationship between base classifier accuracy and their diversity is analysed by application of kappa-error diagrams and a variant of these called kappa-error relative movement diagrams.},
keywords = {Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Arnaiz-González, Alvar; García-Osorio, César; Rodríguez, Juan José
Segmentación de defectos en piezas de fundido usando umbrales adaptativos y ensembles Proceedings Article
In: XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014, pp. 345-350, Zaragoza, Spain, 2014, ISBN: 978-84-15688-76-1.
BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining
@inproceedings{ESTYLF2014a,
title = {Segmentación de defectos en piezas de fundido usando umbrales adaptativos y ensembles},
author = {José Francisco Díez-Pastor and Alvar Arnaiz-González and César García-Osorio and Juan José Rodríguez},
isbn = {978-84-15688-76-1},
year = {2014},
date = {2014-01-01},
booktitle = {XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014},
pages = {345-350},
address = {Zaragoza, Spain},
keywords = {Applied Machine Learning, Business intelligence, Data Mining},
pubstate = {published},
tppubtype = {inproceedings}
}
Arnaiz-González, Alvar; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Selección de instancias en regresión mediante discretización Proceedings Article
In: XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014, pp. 351-356, Zaragoza, Spain, 2014, ISBN: 978-84-15688-76-1.
BibTeX | Tags: Data Mining, Instance selection
@inproceedings{ESTYLF2014b,
title = {Selección de instancias en regresión mediante discretización},
author = {Alvar Arnaiz-González and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
isbn = {978-84-15688-76-1},
year = {2014},
date = {2014-01-01},
booktitle = {XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014},
pages = {351-356},
address = {Zaragoza, Spain},
keywords = {Data Mining, Instance selection},
pubstate = {published},
tppubtype = {inproceedings}
}