2015
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Random Balance: Ensembles of variable priors classifiers for imbalanced data Journal Article
In: Knowledge-Based Systems, vol. 85, pp. 96-111, 2015, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling
@article{RandomBalance,
title = {Random Balance: Ensembles of variable priors classifiers for imbalanced data},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0950705115001720},
doi = {10.1016/j.knosys.2015.04.022},
issn = {0950-7051},
year = {2015},
date = {2015-01-01},
journal = {Knowledge-Based Systems},
volume = {85},
pages = {96-111},
abstract = {Abstract In Machine Learning, a data set is imbalanced when the class proportions are highly skewed. Class-imbalanced problems sets arise routinely in many application domains and pose a challenge to traditional classifiers. We propose a new approach to building ensembles of classifiers for two-class imbalanced data sets, called Random Balance. Each member of the Random Balance ensemble is trained with data sampled from the training set and augmented by artificial instances obtained using SMOTE. The novelty in the approach is that the proportions of the classes for each ensemble member are chosen randomly. The intuition behind the method is that the proposed diversity heuristic will ensure that the ensemble contains classifiers that are specialized for different operating points on the ROC space, thereby leading to larger AUC compared to other ensembles of classifiers. Experiments have been carried out to test the Random Balance approach by itself, and also in combination with standard ensemble methods. As a result, we propose a new ensemble creation method called RB-Boost which combines Random Balance with AdaBoost.M2. This combination involves enforcing random class proportions in addition to instance re-weighting. Experiments with 86 imbalanced data sets from two well known repositories demonstrate the advantage of the Random Balance approach.},
keywords = {AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article
In: Information Sciences, vol. 325, pp. 98 - 117, 2015, ISSN: 0020-0255.
Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling
@article{DiezPastor201598,
title = {Diversity techniques improve the performance of the best imbalance learning ensembles},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186},
doi = {10.1016/j.ins.2015.07.025},
issn = {0020-0255},
year = {2015},
date = {2015-01-01},
journal = {Information Sciences},
volume = {325},
pages = {98 - 117},
abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.},
keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
2013
García-Pedrajas, Nicolás; García-Osorio, César
Boosting for class-imbalanced datasets using genetically evolved supervised non-linear projections Journal Article
In: Progress in Artificial Intelligence, vol. 2, no. 1, pp. 29-44, 2013, ISSN: 2192-6352.
Links | BibTeX | Tags: Boosting, Class-imbalanced problems, Data Mining, Real-coded genetic algorithms
@article{PedrajasOsorio2013,
title = {Boosting for class-imbalanced datasets using genetically evolved supervised non-linear projections},
author = {Nicolás García-Pedrajas and César García-Osorio},
url = {http://dx.doi.org/10.1007/s13748-012-0028-4},
doi = {10.1007/s13748-012-0028-4},
issn = {2192-6352},
year = {2013},
date = {2013-01-01},
journal = {Progress in Artificial Intelligence},
volume = {2},
number = {1},
pages = {29-44},
publisher = {Springer-Verlag},
keywords = {Boosting, Class-imbalanced problems, Data Mining, Real-coded genetic algorithms},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Barbero-García, Víctor; Blanco-Álamo, Alan
Imbalanced Learning Ensembles for Defect Detection in X-Ray Images Inproceedings
In: Ali, Moonis; Bosse, Tibor; Hindriks, Koen V; Hoogendoorn, Mark; Jonker, Catholijn M; Treur, Jan (Ed.): 26th International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2013, pp. 654-663, Amsterdam, The Netherland, 2013, ISBN: 978-3-642-38576-6.
Links | BibTeX | Tags: Applied Machine Learning, Business intelligence, Class-imbalanced problems, Data Mining
@inproceedings{ieaaieDiez-PastorGBB13,
title = {Imbalanced Learning Ensembles for Defect Detection in X-Ray Images},
author = {José Francisco Díez-Pastor and César García-Osorio and Víctor Barbero-García and Alan Blanco-Álamo},
editor = {Moonis Ali and Tibor Bosse and Koen V Hindriks and Mark Hoogendoorn and Catholijn M Jonker and Jan Treur},
url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84881385369&partnerID=40&md5=a5b5f8ad1a108c9da02b51a1346ddb10},
doi = {10.1007/978-3-642-38577-3_68},
isbn = {978-3-642-38576-6},
year = {2013},
date = {2013-01-01},
booktitle = {26th International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2013},
pages = {654-663},
address = {Amsterdam, The Netherland},
keywords = {Applied Machine Learning, Business intelligence, Class-imbalanced problems, Data Mining},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracle Ensembles for Imbalanced Data Inproceedings
In: Zhou, Zhi-Hua; Roli, Fabio; Kittler, Josef (Ed.): 11th International Workshop on Multiple Classifier Systems, MCS 2013, pp. 247-258, Nanjing, China, 2013, ISBN: 978-3-642-38066-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles
@inproceedings{mcsRodriguezDG13,
title = {Random Oracle Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Zhi-Hua Zhou and Fabio Roli and Josef Kittler},
doi = {10.1007/978-3-642-38067-9_22},
isbn = {978-3-642-38066-2},
year = {2013},
date = {2013-01-01},
booktitle = {11th International Workshop on Multiple Classifier Systems, MCS 2013},
pages = {247-258},
address = {Nanjing, China},
crossref = {mcs2013},
keywords = {Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Rodríguez, Juan José; Díez-Pastor, José Francisco; Maudes, Jesús; García-Osorio, César
Disturbing Neighbors Ensembles of Trees for Imbalanced Data Inproceedings
In: Wani, Arif M; Khoshgoftaar, Taghi; Zhu, Xingquan (Hill); Seliya, Naeem (Ed.): 11th International Conference on Machine Learning and Applications, ICMLA 2012, pp. 83-88, IEEE, Boca Ratón, EEUU, 2012, ISBN: 978-0-7695-4913-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods
@inproceedings{RDMG12,
title = {Disturbing Neighbors Ensembles of Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and Jesús Maudes and César García-Osorio},
editor = {Arif M Wani and Taghi Khoshgoftaar and Xingquan (Hill) Zhu and Naeem Seliya},
doi = {10.1109/ICMLA.2012.181},
isbn = {978-0-7695-4913-2},
year = {2012},
date = {2012-01-01},
booktitle = {11th International Conference on Machine Learning and Applications, ICMLA 2012},
volume = {2},
pages = {83-88},
publisher = {IEEE},
address = {Boca Ratón, EEUU},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Ensembles of Decision Trees for Imbalanced Data Inproceedings
In: Sansone, Carlo; Kittler, Josef; Roli, Fabio (Ed.): 10th International Workshop on Multiple Classifier Systems, MCS 2011, pp. 76-85, Springer-Verlag, Naples, Italy, 2011, ISSN: 0302-9743.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{Rodriguez2011,
title = {Ensembles of Decision Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Carlo Sansone and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-21557-5_10},
issn = {0302-9743},
year = {2011},
date = {2011-01-01},
booktitle = {10th International Workshop on Multiple Classifier Systems, MCS 2011},
volume = {6713},
pages = {76-85},
publisher = {Springer-Verlag},
address = {Naples, Italy},
series = {LNCS},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César; Santos, Pedro
Using Model Trees and their Ensembles for Imbalanced Data Inproceedings
In: Lozano, Jose A; Gámez, José A; Moreno, José A (Ed.): Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011, pp. 94–103, Springer, La Laguna, Spain, 2011, ISBN: 978-3-642-25273-0.
BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{RDGS11,
title = {Using Model Trees and their Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio and Pedro Santos},
editor = {Jose A Lozano and José A Gámez and José A Moreno},
isbn = {978-3-642-25273-0},
year = {2011},
date = {2011-01-01},
booktitle = {Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011},
volume = {7023},
pages = {94--103},
publisher = {Springer},
address = {La Laguna, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}