2019
Kuncheva, Ludmila I; Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Gunn, Iain A D
Instance selection improves geometric mean accuracy: a study on imbalanced data classification Journal Article
In: Progress in Artificial Intelligence, vol. 8, no. 2, pp. 215-228, 2019, ISSN: 2192-6352.
Abstract | Links | BibTeX | Tags: Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour, Theoretical perspective
@article{Kuncheva2019,
title = {Instance selection improves geometric mean accuracy: a study on imbalanced data classification},
author = {Ludmila I Kuncheva and Álvar Arnaiz-González and José Francisco Díez-Pastor and Iain A D Gunn},
url = {https://link.springer.com/article/10.1007/s13748-019-00172-4?wt_mc=Internal.Event.1.SEM.ArticleAuthorOnlineFirst&utm_source=ArticleAuthorContributingOnlineFirst&utm_medium=email&utm_content=AA_en_06082018&ArticleAuthorContributingOnlineFirst_20190209},
doi = {10.1007/s13748-019-00172-4},
issn = {2192-6352},
year = {2019},
date = {2019-06-01},
journal = {Progress in Artificial Intelligence},
volume = {8},
number = {2},
pages = {215-228},
abstract = {A natural way of handling imbalanced data is to attempt to equalise the class frequencies and train the classifier of choice on balanced data. For two-class imbalanced problems, the classification success is typically measured by the geometric mean (GM) of the true positive and true negative rates. Here we prove that GM can be improved upon by instance selection, and give the theoretical conditions for such an improvement. We demonstrate that GM is non-monotonic with respect to the number of retained instances, which discourages systematic instance selection. We also show that balancing the distribution frequencies is inferior to a direct maximisation of GM. To verify our theoretical findings, we carried out an experimental study of 12 instance selection methods for imbalanced data, using 66 standard benchmark data sets. The results reveal possible room for new instance selection methods for imbalanced data.},
keywords = {Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour, Theoretical perspective},
pubstate = {published},
tppubtype = {article}
}
Faithfull, William J; Rodríguez, Juan José; Kuncheva, Ludmila I
Combining univariate approaches for ensemble change detection in multivariate data Journal Article
In: Information Fusion, vol. 45, pp. 202-214, 2019, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Change detection, Ensemble methods, Multivariate data
@article{Faithfull2019,
title = {Combining univariate approaches for ensemble change detection in multivariate data},
author = {William J Faithfull and Juan José Rodríguez and Ludmila I Kuncheva},
url = {https://www.sciencedirect.com/science/article/pii/S1566253517301239},
doi = {10.1016/j.inffus.2018.02.003},
issn = {1566-2535},
year = {2019},
date = {2019-01-01},
journal = {Information Fusion},
volume = {45},
pages = {202-214},
abstract = {Detecting change in multivariate data is a challenging problem, especially when class labels are not available. There is a large body of research on univariate change detection, notably in control charts developed originally for engineering applications. We evaluate univariate change detection approaches —including those in the MOA framework — built into ensembles where each member observes a feature in the input space of an unsupervised change detection problem. We present a comparison between the ensemble combinations and three established ‘pure’ multivariate approaches over 96 data sets, and a case study on the KDD Cup 1999 network intrusion detection dataset. We found that ensemble combination of univariate methods consistently outperformed multivariate methods on the four experimental metrics.},
keywords = {Change detection, Ensemble methods, Multivariate data},
pubstate = {published},
tppubtype = {article}
}
2018
Kuncheva, Ludmila I; Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Gunn, Iain A D
Instance Selection Improves Geometric Mean Accuracy: A Study on Imbalanced Data Classification Journal Article
In: arXiv, 2018.
Abstract | Links | BibTeX | Tags: Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour
@article{Kuncheva2018,
title = {Instance Selection Improves Geometric Mean Accuracy: A Study on Imbalanced Data Classification},
author = {Ludmila I Kuncheva and Álvar Arnaiz-González and José Francisco Díez-Pastor and Iain A D Gunn},
url = {https://arxiv.org/abs/1804.07155},
doi = {arXiv:1804.07155v1},
year = {2018},
date = {2018-04-19},
journal = {arXiv},
abstract = {A natural way of handling imbalanced data is to attempt to equalise the class frequencies and train the classifier of choice on balanced data. For two-class imbalanced problems, the classification success is typically measured by the geometric mean (GM) of the true positive and true negative rates. Here we prove that GM can be improved upon by instance selection, and give the theoretical conditions for such an improvement. We demonstrate that GM is non-monotonic with respect to the number of retained instances, which discourages systematic instance selection. We also show that balancing the distribution frequencies is inferior to a direct maximisation of GM. To verify our theoretical findings, we carried out an experimental study of 12 instance selection methods for imbalanced data, using 66 standard benchmark data sets. The results reveal possible room for new instance selection methods for imbalanced data. },
keywords = {Ensemble methods, geometric mean (GM), Imbalanced data, instance/prototype selection, nearest neighbour},
pubstate = {published},
tppubtype = {article}
}
2016
Arnaiz-González, Álvar; Blachnik, Marcin; Kordos, Mirosław; García-Osorio, César
Fusion of instance selection methods in regression tasks Journal Article
In: Information Fusion, vol. 30, pp. 69 - 79, 2016, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Instance selection, Regression
@article{ArnaizGonzalez201669,
title = {Fusion of instance selection methods in regression tasks},
author = {Álvar Arnaiz-González and Marcin Blachnik and Mirosław Kordos and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S1566253515001141},
doi = {10.1016/j.inffus.2015.12.002},
issn = {1566-2535},
year = {2016},
date = {2016-01-01},
journal = {Information Fusion},
volume = {30},
pages = {69 - 79},
abstract = {Abstract Data pre-processing is a very important aspect of data mining. In this paper we discuss instance selection used for prediction algorithms, which is one of the pre-processing approaches. The purpose of instance selection is to improve the data quality by data size reduction and noise elimination. Until recently, instance selection has been applied mainly to classification problems. Very few recent papers address instance selection for regression tasks. This paper proposes fusion of instance selection algorithms for regression tasks to improve the selection performance. As the members of the ensemble two different families of instance selection methods are evaluated: one based on distance threshold and the other one on converting the regression task into a multiple class classification task. Extensive experimental evaluation performed on the two regression versions of the Edited Nearest Neighbor (ENN) and Condensed Nearest Neighbor (CNN) methods showed that the best performance measured by the error value and data size reduction are in most cases obtained for the ensemble methods.},
keywords = {Data Mining, Ensemble methods, Instance selection, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Random feature weights for regression trees Journal Article
In: Progress in Artificial Intelligence, vol. 5, no. 2, pp. 91–103, 2016, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Regression
@article{Arnaiz-González2016,
title = {Random feature weights for regression trees},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://dx.doi.org/10.1007/s13748-016-0081-5},
doi = {10.1007/s13748-016-0081-5},
issn = {2192-6360},
year = {2016},
date = {2016-01-01},
journal = {Progress in Artificial Intelligence},
volume = {5},
number = {2},
pages = {91--103},
abstract = {Ensembles are learning methods the operation of which relies on a combination of different base models. The diversity of ensembles is a fundamental aspect that conditions their operation. Random Feature Weights RFW was proposed as a classification-tree ensemble construction method in which diversity is introduced into each tree by means of a random weight associated with each attribute. These weights vary from one tree to another in the ensemble. In this article, the idea of RFW is adapted to decision-tree regression. A comparison is drawn with other ensemble construction methods: Bagging, Random Forest, Iterated Bagging, Random Subspaces and AdaBoost.R2 obtaining competitive results.},
keywords = {Data Mining, Ensemble methods, Regression},
pubstate = {published},
tppubtype = {article}
}
2015
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Random Balance: Ensembles of variable priors classifiers for imbalanced data Journal Article
In: Knowledge-Based Systems, vol. 85, pp. 96-111, 2015, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling
@article{RandomBalance,
title = {Random Balance: Ensembles of variable priors classifiers for imbalanced data},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0950705115001720},
doi = {10.1016/j.knosys.2015.04.022},
issn = {0950-7051},
year = {2015},
date = {2015-01-01},
journal = {Knowledge-Based Systems},
volume = {85},
pages = {96-111},
abstract = {Abstract In Machine Learning, a data set is imbalanced when the class proportions are highly skewed. Class-imbalanced problems sets arise routinely in many application domains and pose a challenge to traditional classifiers. We propose a new approach to building ensembles of classifiers for two-class imbalanced data sets, called Random Balance. Each member of the Random Balance ensemble is trained with data sampled from the training set and augmented by artificial instances obtained using SMOTE. The novelty in the approach is that the proportions of the classes for each ensemble member are chosen randomly. The intuition behind the method is that the proposed diversity heuristic will ensure that the ensemble contains classifiers that are specialized for different operating points on the ROC space, thereby leading to larger AUC compared to other ensembles of classifiers. Experiments have been carried out to test the Random Balance approach by itself, and also in combination with standard ensemble methods. As a result, we propose a new ensemble creation method called RB-Boost which combines Random Balance with AdaBoost.M2. This combination involves enforcing random class proportions in addition to instance re-weighting. Experiments with 86 imbalanced data sets from two well known repositories demonstrate the advantage of the Random Balance approach.},
keywords = {AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article
In: Information Sciences, vol. 325, pp. 98 - 117, 2015, ISSN: 0020-0255.
Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling
@article{DiezPastor201598,
title = {Diversity techniques improve the performance of the best imbalance learning ensembles},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186},
doi = {10.1016/j.ins.2015.07.025},
issn = {0020-0255},
year = {2015},
date = {2015-01-01},
journal = {Information Sciences},
volume = {325},
pages = {98 - 117},
abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.},
keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
2014
Santos, Pedro; Teixidor, Daniel; Maudes-Raedo, Jesús; Ciurana, Joaquim
Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles Journal Article
In: Journal of Applied Mathematics, vol. 2014, pp. 15, 2014, ISBN: 1110-757X.
Abstract | Links | BibTeX | Tags: Ensemble methods, Laser milling, Neural networks, Support vector machines
@article{Santos2014,
title = {Modelling Laser Milling of Microcavities for the Manufacturing of DES with Ensembles},
author = {Pedro Santos and Daniel Teixidor and Jesús Maudes-Raedo and Joaquim Ciurana},
url = {https://www.hindawi.com/journals/jam/2014/439091/},
doi = {10.1155/2014/439091},
isbn = {1110-757X},
year = {2014},
date = {2014-04-17},
journal = {Journal of Applied Mathematics},
volume = {2014},
pages = {15},
abstract = {A set of designed experiments, involving the use of a pulsed Nd:YAG laser system milling 316L Stainless Steel, serve to study the laser-milling process of microcavities in the manufacture of drug-eluting stents (DES). Diameter, depth, and volume error are considered to be optimized as functions of the process parameters, which include laser intensity, pulse frequency, and scanning speed. Two different DES shapes are studied that combine semispheres and cylinders. Process inputs and outputs are defined by considering the process parameters that can be changed under industrial conditions and the industrial requirements of this manufacturing process. In total, 162 different conditions are tested in a process that is modeled with the following state-of-the-art data-mining regression techniques: Support Vector Regression, Ensembles, Artificial Neural Networks, Linear Regression, and Nearest Neighbor Regression. Ensemble regression emerged as the most suitable technique for studying this industrial problem. Specifically, Iterated Bagging ensembles with unpruned model trees outperformed the other methods in the tests. This method can predict the geometrical dimensions of the machined microcavities with relative errors related to the main average value in the range of 3 to 23%, which are considered very accurate predictions, in view of the characteristics of this innovative industrial task.},
keywords = {Ensemble methods, Laser milling, Neural networks, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Tree ensemble construction using a GRASP-based heuristic and annealed randomness Journal Article
In: Information Fusion, vol. 20, no. 0, pp. 189–202, 2014, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest
@article{DiezPastor2014,
title = {Tree ensemble construction using a GRASP-based heuristic and annealed randomness},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://www.sciencedirect.com/science/article/pii/S1566253514000141},
doi = {10.1016/j.inffus.2014.01.009},
issn = {1566-2535},
year = {2014},
date = {2014-01-01},
journal = {Information Fusion},
volume = {20},
number = {0},
pages = {189--202},
abstract = {Abstract Two new methods for tree ensemble construction are presented: G-Forest and GAR-Forest. In a similar way to Random Forest, the tree construction process entails a degree of randomness. The same strategy used in the GRASP metaheuristic for generating random and adaptive solutions is used at each node of the trees. The source of diversity of the ensemble is the randomness of the solution generation method of GRASP. A further key feature of the tree construction method for GAR-Forest is a decreasing level of randomness during the process of constructing the tree: maximum randomness at the root and minimum randomness at the leaves. The method is therefore named ``GAR'', GRASP with annealed randomness. The results conclusively demonstrate that G-Forest and GAR-Forest outperform Bagging, AdaBoost, MultiBoost, Random Forest and Random Subspaces. The results are even more convincing in the presence of noise, demonstrating the robustness of the method. The relationship between base classifier accuracy and their diversity is analysed by application of kappa-error diagrams and a variant of these called kappa-error relative movement diagrams.},
keywords = {Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest},
pubstate = {published},
tppubtype = {article}
}
2013
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracle Ensembles for Imbalanced Data Inproceedings
In: Zhou, Zhi-Hua; Roli, Fabio; Kittler, Josef (Ed.): 11th International Workshop on Multiple Classifier Systems, MCS 2013, pp. 247-258, Nanjing, China, 2013, ISBN: 978-3-642-38066-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles
@inproceedings{mcsRodriguezDG13,
title = {Random Oracle Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Zhi-Hua Zhou and Fabio Roli and Josef Kittler},
doi = {10.1007/978-3-642-38067-9_22},
isbn = {978-3-642-38066-2},
year = {2013},
date = {2013-01-01},
booktitle = {11th International Workshop on Multiple Classifier Systems, MCS 2013},
pages = {247-258},
address = {Nanjing, China},
crossref = {mcs2013},
keywords = {Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
García-Pedrajas, Nicolás; Maudes-Raedo, Jesús; García-Osorio, César; Rodríguez, Juan José
Supervised subspace projections for constructing ensembles of classifiers Journal Article
In: Information Sciences, vol. 193, pp. 1–21, 2012, ISSN: 0020-0255, (Accepted).
Links | BibTeX | Tags: Classification, Ensemble methods, Subspace methods, Supervised projections
@article{subespacios2012,
title = {Supervised subspace projections for constructing ensembles of classifiers},
author = {Nicolás García-Pedrajas and Jesús Maudes-Raedo and César García-Osorio and Juan José Rodríguez},
url = {http://www.sciencedirect.com/science/article/pii/S0020025511003306},
doi = {10.1016/j.ins.2011.06.023},
issn = {0020-0255},
year = {2012},
date = {2012-06-01},
journal = {Information Sciences},
volume = {193},
pages = {1--21},
publisher = {Elsevier},
note = {Accepted},
keywords = {Classification, Ensemble methods, Subspace methods, Supervised projections},
pubstate = {published},
tppubtype = {article}
}
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; García-Pedrajas, Nicolás
Random Feature Weights for Decision Tree Ensemble Construction Journal Article
In: Information Fusion, vol. 13, no. 1, pp. 20-30, 2012, ISSN: 1566-2535.
Links | BibTeX | Tags: Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest
@article{RFW2012,
title = {Random Feature Weights for Decision Tree Ensemble Construction},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Nicolás García-Pedrajas},
doi = {10.1016/j.inffus.2010.11.004},
issn = {1566-2535},
year = {2012},
date = {2012-01-01},
journal = {Information Fusion},
volume = {13},
number = {1},
pages = {20-30},
keywords = {Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Bustillo, Andrés; Quintana, Guillem; García-Osorio, César
Boosting Projections to improve surface roughness prediction in high-torque milling operations Journal Article
In: Soft Computing, vol. 16, no. 8, pp. 1427-1437, 2012, ISSN: 1432-7643 (Print) 1433-7479 (Online).
Links | BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining, Ensemble methods
@article{BPforIndustrialData2012,
title = {Boosting Projections to improve surface roughness prediction in high-torque milling operations},
author = {José Francisco Díez-Pastor and Andrés Bustillo and Guillem Quintana and César García-Osorio},
url = {http://dx.doi.org/10.1007/s00500-012-0846-0},
doi = {10.1007/s00500-012-0846-0},
issn = {1432-7643 (Print) 1433-7479 (Online)},
year = {2012},
date = {2012-01-01},
journal = {Soft Computing},
volume = {16},
number = {8},
pages = {1427-1437},
keywords = {Applied Machine Learning, Business intelligence, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; Maudes, Jesús; García-Osorio, César
Disturbing Neighbors Ensembles of Trees for Imbalanced Data Inproceedings
In: Wani, Arif M; Khoshgoftaar, Taghi; Zhu, Xingquan (Hill); Seliya, Naeem (Ed.): 11th International Conference on Machine Learning and Applications, ICMLA 2012, pp. 83-88, IEEE, Boca Ratón, EEUU, 2012, ISBN: 978-0-7695-4913-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods
@inproceedings{RDMG12,
title = {Disturbing Neighbors Ensembles of Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and Jesús Maudes and César García-Osorio},
editor = {Arif M Wani and Taghi Khoshgoftaar and Xingquan (Hill) Zhu and Naeem Seliya},
doi = {10.1109/ICMLA.2012.181},
isbn = {978-0-7695-4913-2},
year = {2012},
date = {2012-01-01},
booktitle = {11th International Conference on Machine Learning and Applications, ICMLA 2012},
volume = {2},
pages = {83-88},
publisher = {IEEE},
address = {Boca Ratón, EEUU},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; Pardo, Carlos
Random projections for linear SVM ensembles Journal Article
In: Applied Intelligence, vol. 34, pp. 347-359, 2011, ISSN: 0924-669X, 1573-7497, (10.1007/s10489-011-0283-2).
Links | BibTeX | Tags: Data Mining, Ensemble methods, Support vector machines
@article{RandomProjectionsLinearSVMs,
title = {Random projections for linear SVM ensembles},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Carlos Pardo},
url = {http://dx.doi.org/10.1007/s10489-011-0283-2},
doi = {10.1007/s10489-011-0283-2},
issn = {0924-669X, 1573-7497},
year = {2011},
date = {2011-01-01},
journal = {Applied Intelligence},
volume = {34},
pages = {347-359},
publisher = {Springer Netherlands},
note = {10.1007/s10489-011-0283-2},
keywords = {Data Mining, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José; Bustillo, Andrés
GRASP Forest: A New Ensemble Method for Trees Inproceedings
In: Sansone, Carlo; Kittler, Josef; Roli, Fabio (Ed.): 10th International Workshop on Multiple Classifier Systems, MCS 2011, pp. 66-75, Springer-Verlag, Naples, Italy, 2011, ISSN: 0302-9743.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@inproceedings{Diez-Pastor2011,
title = {GRASP Forest: A New Ensemble Method for Trees},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez and Andrés Bustillo},
editor = {Carlo Sansone and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-21557-5_9},
issn = {0302-9743},
year = {2011},
date = {2011-01-01},
booktitle = {10th International Workshop on Multiple Classifier Systems, MCS 2011},
volume = {6713},
pages = {66-75},
publisher = {Springer-Verlag},
address = {Naples, Italy},
series = {Lecture Notes in Computer Sciences},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Ensembles of Decision Trees for Imbalanced Data Inproceedings
In: Sansone, Carlo; Kittler, Josef; Roli, Fabio (Ed.): 10th International Workshop on Multiple Classifier Systems, MCS 2011, pp. 76-85, Springer-Verlag, Naples, Italy, 2011, ISSN: 0302-9743.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{Rodriguez2011,
title = {Ensembles of Decision Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Carlo Sansone and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-21557-5_10},
issn = {0302-9743},
year = {2011},
date = {2011-01-01},
booktitle = {10th International Workshop on Multiple Classifier Systems, MCS 2011},
volume = {6713},
pages = {76-85},
publisher = {Springer-Verlag},
address = {Naples, Italy},
series = {LNCS},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César; Santos, Pedro
Using Model Trees and their Ensembles for Imbalanced Data Inproceedings
In: Lozano, Jose A; Gámez, José A; Moreno, José A (Ed.): Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011, pp. 94–103, Springer, La Laguna, Spain, 2011, ISBN: 978-3-642-25273-0.
BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{RDGS11,
title = {Using Model Trees and their Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio and Pedro Santos},
editor = {Jose A Lozano and José A Gámez and José A Moreno},
isbn = {978-3-642-25273-0},
year = {2011},
date = {2011-01-01},
booktitle = {Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011},
volume = {7023},
pages = {94--103},
publisher = {Springer},
address = {La Laguna, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2010
Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús; Díez-Pastor, José Francisco
An Experimental Study on Ensembles of Functional Trees Inproceedings
In: Gayar, Neamat El; Kittler, Josef; Roli, Fabio (Ed.): 9th International Workshop on Multiple Classifier Systems, MCS 2010, pp. 64-73, Cairo, Egypt, 2010, ISBN: 978-3-642-12126-5.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@inproceedings{RGMD10,
title = {An Experimental Study on Ensembles of Functional Trees},
author = {Juan José Rodríguez and César García-Osorio and Jesús Maudes and José Francisco Díez-Pastor},
editor = {Neamat El Gayar and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-12127-2_7},
isbn = {978-3-642-12126-5},
year = {2010},
date = {2010-01-01},
booktitle = {9th International Workshop on Multiple Classifier Systems, MCS 2010},
volume = {5997},
pages = {64-73},
address = {Cairo, Egypt},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús
Forests of Nested Dichotomies Journal Article
In: Pattern Recognition Letters, vol. 31, no. 2, pp. 125-132, 2010, ISSN: 0167-8655.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@article{RGM10,
title = {Forests of Nested Dichotomies},
author = {Juan José Rodríguez and César García-Osorio and Jesús Maudes},
doi = {10.1016/j.patrec.2009.09.015},
issn = {0167-8655},
year = {2010},
date = {2010-01-01},
journal = {Pattern Recognition Letters},
volume = {31},
number = {2},
pages = {125-132},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {article}
}
Pardo, Carlos; Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús
An Empirical Study of Multilayer Perceptron Ensembles for Regression Tasks Inproceedings
In: García-Pedrajas, Nicolás; Herrera, Francisco; Fyfe, Colin; Benítez, José Manuel; Ali, Moonis (Ed.): Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010, pp. 106–115, Springer, Córdoba, Spain, 2010, ISBN: 978-3-642-13024-3.
BibTeX | Tags: Data Mining, Ensemble methods, Neural networks, Regression
@inproceedings{PRGM10,
title = {An Empirical Study of Multilayer Perceptron Ensembles for Regression Tasks},
author = {Carlos Pardo and Juan José Rodríguez and César García-Osorio and Jesús Maudes},
editor = {Nicolás García-Pedrajas and Francisco Herrera and Colin Fyfe and José Manuel Benítez and Moonis Ali},
isbn = {978-3-642-13024-3},
year = {2010},
date = {2010-01-01},
booktitle = {Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010},
volume = {6097},
pages = {106--115},
publisher = {Springer},
address = {Córdoba, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Ensemble methods, Neural networks, Regression},
pubstate = {published},
tppubtype = {inproceedings}
}
2009
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Disturbing Neighbors Ensembles for Linear SVM Inproceedings
In: Benediktsson, Jon Atli; Kittler, Josef; Roli, Fabio (Ed.): 8th International Workshop on Multiple Classifier Systems, MCS 2009, pp. 191–200, Springer-Verlag, Reykjavik, Iceland, 2009, ISBN: 978-3-642-02325-5.
Links | BibTeX | Tags: Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines
@inproceedings{MRG09a,
title = {Disturbing Neighbors Ensembles for Linear SVM},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
editor = {Jon Atli Benediktsson and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-02326-2_20},
isbn = {978-3-642-02325-5},
year = {2009},
date = {2009-01-01},
booktitle = {8th International Workshop on Multiple Classifier Systems, MCS 2009},
volume = {5519},
pages = {191--200},
publisher = {Springer-Verlag},
address = {Reykjavik, Iceland},
series = {Lecture Notes in Computer Science},
keywords = {Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {inproceedings}
}
2008
García-Osorio, César; García-Pedrajas, Nicolás
Constructing ensembles of classifiers using linear projections based on misclassified instances Inproceedings
In: Verleysen, Michel (Ed.): 16th European Symposium on Artificial Neural Networks (ESANN 2008), pp. 283–288, d-side publications, Bruges, Belgium, 2008, ISBN: 2-930307-08-0.
BibTeX | Tags: Classifier ensembles, Data Mining, Ensemble methods, Linear projections
@inproceedings{ESANN08,
title = {Constructing ensembles of classifiers using linear projections based on misclassified instances},
author = {César García-Osorio and Nicolás García-Pedrajas},
editor = {Michel Verleysen},
isbn = {2-930307-08-0},
year = {2008},
date = {2008-04-01},
booktitle = {16th European Symposium on Artificial Neural Networks (ESANN 2008)},
pages = {283--288},
publisher = {d-side publications},
address = {Bruges, Belgium},
keywords = {Classifier ensembles, Data Mining, Ensemble methods, Linear projections},
pubstate = {published},
tppubtype = {inproceedings}
}
Maudes-Raedo, Jesús; Rodríguez, Juan José; García-Osorio, César
Disturbing Neighbors Diversity for Decision Forest Inproceedings
In: Valentini, Giorgio; Okun, Oleg (Ed.): Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA 2008), pp. 67–71, Patras, Grecia, 2008, ISBN: 978-84-612-4475-1.
BibTeX | Tags: Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods
@inproceedings{SUEMA2008:DisturbingNeighbors,
title = {Disturbing Neighbors Diversity for Decision Forest},
author = {Jesús Maudes-Raedo and Juan José Rodríguez and César García-Osorio},
editor = {Giorgio Valentini and Oleg Okun},
isbn = {978-84-612-4475-1},
year = {2008},
date = {2008-00-01},
booktitle = {Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA 2008)},
pages = {67--71},
address = {Patras, Grecia},
keywords = {Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2007
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Cascading with VDM and Binary Decision Trees for Nominal Data Inproceedings
In: Okun, Oleg; Valentini, Giorgio (Ed.): Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA'2007), pp. 28–42, Gerona, 2007.
BibTeX | Tags: Cascading, Data Mining, Ensemble methods
@inproceedings{MRG07b,
title = {Cascading with VDM and Binary Decision Trees for Nominal Data},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
editor = {Oleg Okun and Giorgio Valentini},
year = {2007},
date = {2007-06-01},
booktitle = {Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA'2007)},
pages = {28--42},
address = {Gerona},
keywords = {Cascading, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Pedrajas, Nicolás; García-Osorio, César; Fyfe, Colin
Nonlinear ``boosting'' projections for ensemble construction Journal Article
In: Journal of Machine Learning Research, vol. 8, pp. 1–33, 2007, ISSN: 1532-4435.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Ensemble methods, Neural networks, Nonlinear projections
@article{cgosorio07boosting,
title = {Nonlinear ``boosting'' projections for ensemble construction},
author = {Nicolás García-Pedrajas and César García-Osorio and Colin Fyfe},
url = {http://jmlr.csail.mit.edu/papers/volume8/garcia-pedrajas07a/garcia-pedrajas07a.pdf},
issn = {1532-4435},
year = {2007},
date = {2007-01-01},
journal = {Journal of Machine Learning Research},
volume = {8},
pages = {1--33},
abstract = {In this paper we propose a novel approach for ensemble construction based on the use of nonlinear
projections to achieve both accuracy and diversity of individual classifiers. The proposed approach
combines the philosophy of boosting, putting more effort on difficult instances, with the basis of
the random subspace method. Our main contribution is that instead of using a random subspace,
we construct a projection taking into account the instances which have posed most difficulties to
previous classifiers. In this way, consecutive nonlinear projections are created by a neural network
trained using only incorrectly classified instances. The feature subspace induced by the hidden layer
of this network is used as the input space to a new classifier. The method is compared with bagging
and boosting techniques, showing an improved performance on a large set of 44 problems from the
UCI Machine Learning Repository. An additional study showed that the proposed approach is less
sensitive to noise in the data than boosting methods.},
keywords = {Boosting, Classifier ensembles, Data Mining, Ensemble methods, Neural networks, Nonlinear projections},
pubstate = {published},
tppubtype = {article}
}
projections to achieve both accuracy and diversity of individual classifiers. The proposed approach
combines the philosophy of boosting, putting more effort on difficult instances, with the basis of
the random subspace method. Our main contribution is that instead of using a random subspace,
we construct a projection taking into account the instances which have posed most difficulties to
previous classifiers. In this way, consecutive nonlinear projections are created by a neural network
trained using only incorrectly classified instances. The feature subspace induced by the hidden layer
of this network is used as the input space to a new classifier. The method is compared with bagging
and boosting techniques, showing an improved performance on a large set of 44 problems from the
UCI Machine Learning Repository. An additional study showed that the proposed approach is less
sensitive to noise in the data than boosting methods.
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Cascading for Nominal Data Inproceedings
In: 7th International Workshop, MCS 2007, pp. 231–240, Springer-Verlag, Prague, Czech Republic, 2007, ISSN: 0302-9743.
Links | BibTeX | Tags: Cascading, Data Mining, Ensemble methods
@inproceedings{CascadingLNCS2007,
title = {Cascading for Nominal Data},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
doi = {10.1007/978-3-540-72523-7_24},
issn = {0302-9743},
year = {2007},
date = {2007-01-01},
booktitle = {7th International Workshop, MCS 2007},
volume = {4472},
pages = {231--240},
publisher = {Springer-Verlag},
address = {Prague, Czech Republic},
series = {Lectures Notes in Computer Science},
keywords = {Cascading, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}