2021
Juez-Gil, Mario; Arnaiz-González, Álvar; Rodríguez, Juan José; López-Nozal, Carlos; García-Osorio, César
Approx-SMOTE: Fast SMOTE for Big Data on Apache Spark Journal Article
In: Neurocomputing, vol. 464, pp. 432-437, 2021, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Big data, Data Mining, imbalance, SMOTE, Spark
@article{Juez-Gil2021bb,
title = {Approx-SMOTE: Fast SMOTE for Big Data on Apache Spark},
author = {Mario Juez-Gil and Álvar Arnaiz-González and Juan José Rodríguez and Carlos López-Nozal and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S0925231221012832},
doi = {https://doi.org/10.1016/j.neucom.2021.08.086},
issn = {0925-2312},
year = {2021},
date = {2021-11-13},
journal = {Neurocomputing},
volume = {464},
pages = {432-437},
abstract = {One of the main goals of Big Data research, is to find new data mining methods that are able to process large amounts of data in acceptable times. In Big Data classification, as in traditional classification, class imbalance is a common problem that must be addressed, in the case of Big Data also looking for a solution that can be applied in an acceptable execution time. In this paper we present Approx-SMOTE, a parallel implementation of the SMOTE algorithm for the Apache Spark framework. The key difference with the original SMOTE, besides parallelism, is that it uses an approximated version of k-Nearest Neighbor which makes it highly scalable. Although an implementation of SMOTE for Big Data already exists (SMOTE-BD), it uses an exact Nearest Neighbor search, which does not make it entirely scalable. Approx-SMOTE on the other hand is able to achieve up to 30 times faster run times without sacrificing the improved classification performance offered by the original SMOTE.},
keywords = {Big data, Data Mining, imbalance, SMOTE, Spark},
pubstate = {published},
tppubtype = {article}
}
2018
Güemes-Peña, Diego; López-Nozal, Carlos; Marticorena-Sánchez, Raúl; Maudes-Raedo, Jesús
Emerging topics in mining software repositories Journal Article
In: Progress in Artificial Intelligence, pp. 1-11, 2018, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Data Mining, Machine learning, Software engineering, Software process, Software repository
@article{Güemes-Peña2018,
title = {Emerging topics in mining software repositories},
author = {Diego Güemes-Peña and Carlos López-Nozal and Raúl Marticorena-Sánchez and Jesús Maudes-Raedo},
url = {https://link.springer.com/content/pdf/10.1007/s13748-018-0147-7.pdf},
doi = {10.1007/s13748-018-0147-7},
issn = {2192-6360},
year = {2018},
date = {2018-01-01},
journal = {Progress in Artificial Intelligence},
pages = {1-11},
abstract = {A software process is a set of related activities that culminates in the production of a software package: specification, design, implementation, testing, evolution into new versions, and maintenance. There are also other supporting activities such as configuration and change management, quality assurance, project management, evaluation of user experience, etc. Software repositories are infrastructures to support all these activities. They can be composed with several systems that include code change management, bug tracking, code review, build system, release binaries, wikis, forums, etc. This position paper on mining software repositories presents a review and a discussion of research in this field over the past decade. We also identify applied machine learning strategies, current working topics, and future challenges for the improvement of company decision-making systems. Machine learning is defined as the process of discovering patterns in data. It can be applied to software repositories, since every change is recorded as data. Companies can then use these patterns as the basis for their decision-making systems and for knowledge discovery.},
keywords = {Data Mining, Machine learning, Software engineering, Software process, Software repository},
pubstate = {published},
tppubtype = {article}
}
2017
Maudes, Jesus; Bustillo, Andrés; Guerra, Antonio J; Ciurana, Joaquim
Random Forest ensemble prediction of stent dimensions in microfabrication processes Journal Article
In: The International Journal of Advanced Manufacturing Technology, vol. 91, no. 1, pp. 879–893, 2017, ISSN: 1433-3015.
Abstract | Links | BibTeX | Tags: Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining
@article{Maudes2017,
title = {Random Forest ensemble prediction of stent dimensions in microfabrication processes},
author = {Jesus Maudes and Andrés Bustillo and Antonio J Guerra and Joaquim Ciurana},
url = {https://doi.org/10.1007/s00170-016-9695-9},
doi = {10.1007/s00170-016-9695-9},
issn = {1433-3015},
year = {2017},
date = {2017-07-01},
journal = {The International Journal of Advanced Manufacturing Technology},
volume = {91},
number = {1},
pages = {879--893},
abstract = {The recent development of new laser machine tools for the manufacture of micro-scale metallic components has boosted demand in the field of medical applications. However, the optimization of this process encounters a major problem: a knowledge gap concerning the relation between the controllable parameters of these machine tools and the quality of the machined components. Our research proposes a two-step strategy to approach this problem for the manufacture of stents. First, a screening test identifies good and bad performance conditions for the laser process and generates useful information on cutting performance; then, a stent is manufactured under different cutting conditions and the most accurate machine learning technique to model this process is identified. This strategy is validated with the performance of experiments that vary pulse duration, laser power, and cutting speed, and measure two geometrical characteristics of the stent geometry. The results showed that linear Support Vector Machines can identify good and bad cutting conditions, while Random Forest ensembles of regression trees can predict with high accuracy the two characteristics of the stent geometry under study. Besides, this technique can extract useful information from the screening test that improves its final accuracy. In view of the small dataset size, an alternative based on the leave-one-out technique was used, instead of standard cross validation, so as to assure the generalization capability of the models.},
keywords = {Data Mining, Ensembles of regressors, Random forest, Regression trees, Stents Laser machining},
pubstate = {published},
tppubtype = {article}
}
2016
Arnaiz-González, Álvar; Blachnik, Marcin; Kordos, Mirosław; García-Osorio, César
Fusion of instance selection methods in regression tasks Journal Article
In: Information Fusion, vol. 30, pp. 69 - 79, 2016, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Instance selection, Regression
@article{ArnaizGonzalez201669,
title = {Fusion of instance selection methods in regression tasks},
author = {Álvar Arnaiz-González and Marcin Blachnik and Mirosław Kordos and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S1566253515001141},
doi = {10.1016/j.inffus.2015.12.002},
issn = {1566-2535},
year = {2016},
date = {2016-01-01},
journal = {Information Fusion},
volume = {30},
pages = {69 - 79},
abstract = {Abstract Data pre-processing is a very important aspect of data mining. In this paper we discuss instance selection used for prediction algorithms, which is one of the pre-processing approaches. The purpose of instance selection is to improve the data quality by data size reduction and noise elimination. Until recently, instance selection has been applied mainly to classification problems. Very few recent papers address instance selection for regression tasks. This paper proposes fusion of instance selection algorithms for regression tasks to improve the selection performance. As the members of the ensemble two different families of instance selection methods are evaluated: one based on distance threshold and the other one on converting the regression task into a multiple class classification task. Extensive experimental evaluation performed on the two regression versions of the Edited Nearest Neighbor (ENN) and Condensed Nearest Neighbor (CNN) methods showed that the best performance measured by the error value and data size reduction are in most cases obtained for the ensemble methods.},
keywords = {Data Mining, Ensemble methods, Instance selection, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection for regression by discretization Journal Article
In: Expert Systems With Applications, 2016, ISSN: 0957-4174.
Links | BibTeX | Tags: Data Mining, Instance selection, Regression
@article{ArnaizGonzalez201669b,
title = {Instance selection for regression by discretization},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
doi = {10.1016/j.eswa.2015.12.046},
issn = {0957-4174},
year = {2016},
date = {2016-01-01},
journal = {Expert Systems With Applications},
keywords = {Data Mining, Instance selection, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection for regression: Adapting DROP Journal Article
In: Neurocomputing, vol. 201, pp. 66–81, 2016, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Data Mining, DROP, Instance selection, Noise filtering, Regression
@article{ArnaizGonzález2016,
title = {Instance selection for regression: Adapting DROP},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S0925231216301953},
doi = {10.1016/j.neucom.2016.04.003},
issn = {0925-2312},
year = {2016},
date = {2016-01-01},
journal = {Neurocomputing},
volume = {201},
pages = {66--81},
abstract = {Abstract Machine Learning has two central processes of interest that captivate the scientific community: classification and regression. Although instance selection for classification has shown its usefulness and has been researched in depth, instance selection for regression has not followed the same path and there are few published algorithms on the subject. In this paper, we propose that various adaptations of DROP, a well-known family of instance selection methods for classification, be applied to regression. Their behaviour is analysed using a broad range of datasets. The results are presented of the analysis of four new proposals for the reduction of dataset size, the effect on error when several classifiers are trained with the reduced dataset, and their robustness against noise. This last aspect is especially important, since in real life, it is frequent that the registered data be inexact and present distortions due to different causes: errors in the measurement tools, typos when writing results, existence of outliers and spurious readings, corruption in files, etc. When the datasets are small it is possible to manually correct these problems, but for big and huge datasets is better to have automatic methods to deal with these problems. In the experimental part, the proposed methods are found to be quite robust to noise.},
keywords = {Data Mining, DROP, Instance selection, Noise filtering, Regression},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection of linear complexity for big data Journal Article
In: Knowledge-Based Systems, vol. 107, pp. 83–95, 2016, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: Big data, Data Mining, Data reduction, Hashing, Instance selection, Nearest neighbors
@article{ArnaizGonzálezLSHIS2016,
title = {Instance selection of linear complexity for big data},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S0950705116301617},
doi = {10.1016/j.knosys.2016.05.056},
issn = {0950-7051},
year = {2016},
date = {2016-01-01},
journal = {Knowledge-Based Systems},
volume = {107},
pages = {83--95},
abstract = {Abstract Over recent decades, database sizes have grown considerably. Larger sizes present new challenges, because machine learning algorithms are not prepared to process such large volumes of information. Instance selection methods can alleviate this problem when the size of the data set is medium to large. However, even these methods face similar problems with very large-to-massive data sets. In this paper, two new algorithms with linear complexity for instance selection purposes are presented. Both algorithms use locality-sensitive hashing to find similarities between instances. While the complexity of conventional methods (usually quadratic, O ( n 2 ) , or log-linear, O ( n log n ) ) means that they are unable to process large-sized data sets, the new proposal shows competitive results in terms of accuracy. Even more remarkably, it shortens execution time, as the proposal manages to reduce complexity and make it linear with respect to the data set size. The new proposal has been compared with some of the best known instance selection methods for testing and has also been evaluated on large data sets (up to a million instances).},
keywords = {Big data, Data Mining, Data reduction, Hashing, Instance selection, Nearest neighbors},
pubstate = {published},
tppubtype = {article}
}
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Random feature weights for regression trees Journal Article
In: Progress in Artificial Intelligence, vol. 5, no. 2, pp. 91–103, 2016, ISSN: 2192-6360.
Abstract | Links | BibTeX | Tags: Data Mining, Ensemble methods, Regression
@article{Arnaiz-González2016,
title = {Random feature weights for regression trees},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://dx.doi.org/10.1007/s13748-016-0081-5},
doi = {10.1007/s13748-016-0081-5},
issn = {2192-6360},
year = {2016},
date = {2016-01-01},
journal = {Progress in Artificial Intelligence},
volume = {5},
number = {2},
pages = {91--103},
abstract = {Ensembles are learning methods the operation of which relies on a combination of different base models. The diversity of ensembles is a fundamental aspect that conditions their operation. Random Feature Weights RFW was proposed as a classification-tree ensemble construction method in which diversity is introduced into each tree by means of a random weight associated with each attribute. These weights vary from one tree to another in the ensemble. In this article, the idea of RFW is adapted to decision-tree regression. A comparison is drawn with other ensemble construction methods: Bagging, Random Forest, Iterated Bagging, Random Subspaces and AdaBoost.R2 obtaining competitive results.},
keywords = {Data Mining, Ensemble methods, Regression},
pubstate = {published},
tppubtype = {article}
}
2015
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Random Balance: Ensembles of variable priors classifiers for imbalanced data Journal Article
In: Knowledge-Based Systems, vol. 85, pp. 96-111, 2015, ISSN: 0950-7051.
Abstract | Links | BibTeX | Tags: AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling
@article{RandomBalance,
title = {Random Balance: Ensembles of variable priors classifiers for imbalanced data},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0950705115001720},
doi = {10.1016/j.knosys.2015.04.022},
issn = {0950-7051},
year = {2015},
date = {2015-01-01},
journal = {Knowledge-Based Systems},
volume = {85},
pages = {96-111},
abstract = {Abstract In Machine Learning, a data set is imbalanced when the class proportions are highly skewed. Class-imbalanced problems sets arise routinely in many application domains and pose a challenge to traditional classifiers. We propose a new approach to building ensembles of classifiers for two-class imbalanced data sets, called Random Balance. Each member of the Random Balance ensemble is trained with data sampled from the training set and augmented by artificial instances obtained using SMOTE. The novelty in the approach is that the proportions of the classes for each ensemble member are chosen randomly. The intuition behind the method is that the proposed diversity heuristic will ensure that the ensemble contains classifiers that are specialized for different operating points on the ROC space, thereby leading to larger AUC compared to other ensembles of classifiers. Experiments have been carried out to test the Random Balance approach by itself, and also in combination with standard ensemble methods. As a result, we propose a new ensemble creation method called RB-Boost which combines Random Balance with AdaBoost.M2. This combination involves enforcing random class proportions in addition to instance re-weighting. Experiments with 86 imbalanced data sets from two well known repositories demonstrate the advantage of the Random Balance approach.},
keywords = {AdaBoost, Bagging, Class-imbalanced problems, Classifier ensembles, Data Mining, Ensemble methods, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César; Kuncheva, Ludmila I
Diversity techniques improve the performance of the best imbalance learning ensembles Journal Article
In: Information Sciences, vol. 325, pp. 98 - 117, 2015, ISSN: 0020-0255.
Abstract | Links | BibTeX | Tags: Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling
@article{DiezPastor201598,
title = {Diversity techniques improve the performance of the best imbalance learning ensembles},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio and Ludmila I Kuncheva},
url = {http://www.sciencedirect.com/science/article/pii/S0020025515005186},
doi = {10.1016/j.ins.2015.07.025},
issn = {0020-0255},
year = {2015},
date = {2015-01-01},
journal = {Information Sciences},
volume = {325},
pages = {98 - 117},
abstract = {Abstract Many real-life problems can be described as unbalanced, where the number of instances belonging to one of the classes is much larger than the numbers in other classes. Examples are spam detection, credit card fraud detection or medical diagnosis. Ensembles of classifiers have acquired popularity in this kind of problems for their ability to obtain better results than individual classifiers. The most commonly used techniques by those ensembles especially designed to deal with imbalanced problems are for example Re-weighting, Oversampling and Undersampling. Other techniques, originally intended to increase the ensemble diversity, have not been systematically studied for their effect on imbalanced problems. Among these are Random Oracles, Disturbing Neighbors, Random Feature Weights or Rotation Forest. This paper presents an overview and an experimental study of various ensemble-based methods for imbalanced problems, the methods have been tested in its original form and in conjunction with several diversity-increasing techniques, using 84 imbalanced data sets from two well known repositories. This paper shows that these diversity-increasing techniques significantly improve the performance of ensemble methods for imbalanced problems and provides some ideas about when it is more convenient to use these diversifying techniques.},
keywords = {Class-imbalanced problems, Classifier ensembles, Data Mining, Diversity, Ensemble methods, Rotation forest, SMOTE, Undersampling},
pubstate = {published},
tppubtype = {article}
}
2014
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Tree ensemble construction using a GRASP-based heuristic and annealed randomness Journal Article
In: Information Fusion, vol. 20, no. 0, pp. 189–202, 2014, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest
@article{DiezPastor2014,
title = {Tree ensemble construction using a GRASP-based heuristic and annealed randomness},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
url = {http://www.sciencedirect.com/science/article/pii/S1566253514000141},
doi = {10.1016/j.inffus.2014.01.009},
issn = {1566-2535},
year = {2014},
date = {2014-01-01},
journal = {Information Fusion},
volume = {20},
number = {0},
pages = {189--202},
abstract = {Abstract Two new methods for tree ensemble construction are presented: G-Forest and GAR-Forest. In a similar way to Random Forest, the tree construction process entails a degree of randomness. The same strategy used in the GRASP metaheuristic for generating random and adaptive solutions is used at each node of the trees. The source of diversity of the ensemble is the randomness of the solution generation method of GRASP. A further key feature of the tree construction method for GAR-Forest is a decreasing level of randomness during the process of constructing the tree: maximum randomness at the root and minimum randomness at the leaves. The method is therefore named ``GAR'', GRASP with annealed randomness. The results conclusively demonstrate that G-Forest and GAR-Forest outperform Bagging, AdaBoost, MultiBoost, Random Forest and Random Subspaces. The results are even more convincing in the presence of noise, demonstrating the robustness of the method. The relationship between base classifier accuracy and their diversity is analysed by application of kappa-error diagrams and a variant of these called kappa-error relative movement diagrams.},
keywords = {Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, GRASP metahuristic, Random forest},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; Arnaiz-González, Alvar; García-Osorio, César; Rodríguez, Juan José
Segmentación de defectos en piezas de fundido usando umbrales adaptativos y ensembles Inproceedings
In: XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014, pp. 345-350, Zaragoza, Spain, 2014, ISBN: 978-84-15688-76-1.
BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining
@inproceedings{ESTYLF2014a,
title = {Segmentación de defectos en piezas de fundido usando umbrales adaptativos y ensembles},
author = {José Francisco Díez-Pastor and Alvar Arnaiz-González and César García-Osorio and Juan José Rodríguez},
isbn = {978-84-15688-76-1},
year = {2014},
date = {2014-01-01},
booktitle = {XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014},
pages = {345-350},
address = {Zaragoza, Spain},
keywords = {Applied Machine Learning, Business intelligence, Data Mining},
pubstate = {published},
tppubtype = {inproceedings}
}
Arnaiz-González, Alvar; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Selección de instancias en regresión mediante discretización Inproceedings
In: XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014, pp. 351-356, Zaragoza, Spain, 2014, ISBN: 978-84-15688-76-1.
BibTeX | Tags: Data Mining, Instance selection
@inproceedings{ESTYLF2014b,
title = {Selección de instancias en regresión mediante discretización},
author = {Alvar Arnaiz-González and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
isbn = {978-84-15688-76-1},
year = {2014},
date = {2014-01-01},
booktitle = {XVII congreso español sobre tecnologías y lógica fuzzy, ESTYLF 2014},
pages = {351-356},
address = {Zaragoza, Spain},
keywords = {Data Mining, Instance selection},
pubstate = {published},
tppubtype = {inproceedings}
}
2013
García-Pedrajas, Nicolás; García-Osorio, César
Boosting for class-imbalanced datasets using genetically evolved supervised non-linear projections Journal Article
In: Progress in Artificial Intelligence, vol. 2, no. 1, pp. 29-44, 2013, ISSN: 2192-6352.
Links | BibTeX | Tags: Boosting, Class-imbalanced problems, Data Mining, Real-coded genetic algorithms
@article{PedrajasOsorio2013,
title = {Boosting for class-imbalanced datasets using genetically evolved supervised non-linear projections},
author = {Nicolás García-Pedrajas and César García-Osorio},
url = {http://dx.doi.org/10.1007/s13748-012-0028-4},
doi = {10.1007/s13748-012-0028-4},
issn = {2192-6352},
year = {2013},
date = {2013-01-01},
journal = {Progress in Artificial Intelligence},
volume = {2},
number = {1},
pages = {29-44},
publisher = {Springer-Verlag},
keywords = {Boosting, Class-imbalanced problems, Data Mining, Real-coded genetic algorithms},
pubstate = {published},
tppubtype = {article}
}
Pardo, Carlos; Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
Rotation Forests for regression Journal Article
In: Applied Mathematics and Computation, vol. 219, no. 19, pp. 9914-9924, 2013, ISSN: 0096-3003.
Links | BibTeX | Tags: Data Mining, Regression, Rotation forest
@article{amcPardoDGR13,
title = {Rotation Forests for regression},
author = {Carlos Pardo and José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
doi = {10.1016/j.amc.2013.03.139},
issn = {0096-3003},
year = {2013},
date = {2013-01-01},
journal = {Applied Mathematics and Computation},
volume = {219},
number = {19},
pages = {9914-9924},
keywords = {Data Mining, Regression, Rotation forest},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Barbero-García, Víctor; Blanco-Álamo, Alan
Imbalanced Learning Ensembles for Defect Detection in X-Ray Images Inproceedings
In: Ali, Moonis; Bosse, Tibor; Hindriks, Koen V; Hoogendoorn, Mark; Jonker, Catholijn M; Treur, Jan (Ed.): 26th International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2013, pp. 654-663, Amsterdam, The Netherland, 2013, ISBN: 978-3-642-38576-6.
Links | BibTeX | Tags: Applied Machine Learning, Business intelligence, Class-imbalanced problems, Data Mining
@inproceedings{ieaaieDiez-PastorGBB13,
title = {Imbalanced Learning Ensembles for Defect Detection in X-Ray Images},
author = {José Francisco Díez-Pastor and César García-Osorio and Víctor Barbero-García and Alan Blanco-Álamo},
editor = {Moonis Ali and Tibor Bosse and Koen V Hindriks and Mark Hoogendoorn and Catholijn M Jonker and Jan Treur},
url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84881385369&partnerID=40&md5=a5b5f8ad1a108c9da02b51a1346ddb10},
doi = {10.1007/978-3-642-38577-3_68},
isbn = {978-3-642-38576-6},
year = {2013},
date = {2013-01-01},
booktitle = {26th International Conference on Industrial, Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2013},
pages = {654-663},
address = {Amsterdam, The Netherland},
keywords = {Applied Machine Learning, Business intelligence, Class-imbalanced problems, Data Mining},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracle Ensembles for Imbalanced Data Inproceedings
In: Zhou, Zhi-Hua; Roli, Fabio; Kittler, Josef (Ed.): 11th International Workshop on Multiple Classifier Systems, MCS 2013, pp. 247-258, Nanjing, China, 2013, ISBN: 978-3-642-38066-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles
@inproceedings{mcsRodriguezDG13,
title = {Random Oracle Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Zhi-Hua Zhou and Fabio Roli and Josef Kittler},
doi = {10.1007/978-3-642-38067-9_22},
isbn = {978-3-642-38066-2},
year = {2013},
date = {2013-01-01},
booktitle = {11th International Workshop on Multiple Classifier Systems, MCS 2013},
pages = {247-258},
address = {Nanjing, China},
crossref = {mcs2013},
keywords = {Class-imbalanced problems, Data Mining, Ensemble methods, Random oracles},
pubstate = {published},
tppubtype = {inproceedings}
}
2012
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; García-Pedrajas, Nicolás
Random Feature Weights for Decision Tree Ensemble Construction Journal Article
In: Information Fusion, vol. 13, no. 1, pp. 20-30, 2012, ISSN: 1566-2535.
Links | BibTeX | Tags: Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest
@article{RFW2012,
title = {Random Feature Weights for Decision Tree Ensemble Construction},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Nicolás García-Pedrajas},
doi = {10.1016/j.inffus.2010.11.004},
issn = {1566-2535},
year = {2012},
date = {2012-01-01},
journal = {Information Fusion},
volume = {13},
number = {1},
pages = {20-30},
keywords = {Bagging, Boosting, Classifier ensembles, Data Mining, Decision trees, Ensemble methods, Random forest},
pubstate = {published},
tppubtype = {article}
}
Pardo, Carlos; Díez-Pastor, José Francisco; García-Pedrajas, Nicolás; Rodríguez, Juan José; García-Osorio, César
Linear projections --- An Experimental Study for Regression Problems Inproceedings
In: Carmona, Pedro Latorre; Sánchez, Salvador J; Fred, Ana (Ed.): 1st International Conference on Patter Recognition Applications and Methods (ICPRAM), pp. 198–204, SciTePress --- Science and Technology Publications, Villamoura, Portugal, 2012, ISBN: 978-989-8425-98-0.
BibTeX | Tags: Data Mining, Linear projections, Regression
@inproceedings{ICPRAM2012,
title = {Linear projections --- An Experimental Study for Regression Problems},
author = {Carlos Pardo and José Francisco Díez-Pastor and Nicolás García-Pedrajas and Juan José Rodríguez and César García-Osorio},
editor = {Pedro Latorre Carmona and Salvador J Sánchez and Ana Fred},
isbn = {978-989-8425-98-0},
year = {2012},
date = {2012-01-01},
booktitle = {1st International Conference on Patter Recognition Applications and Methods (ICPRAM)},
pages = {198--204},
publisher = {SciTePress --- Science and Technology Publications},
address = {Villamoura, Portugal},
keywords = {Data Mining, Linear projections, Regression},
pubstate = {published},
tppubtype = {inproceedings}
}
Díez-Pastor, José Francisco; Bustillo, Andrés; Quintana, Guillem; García-Osorio, César
Boosting Projections to improve surface roughness prediction in high-torque milling operations Journal Article
In: Soft Computing, vol. 16, no. 8, pp. 1427-1437, 2012, ISSN: 1432-7643 (Print) 1433-7479 (Online).
Links | BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining, Ensemble methods
@article{BPforIndustrialData2012,
title = {Boosting Projections to improve surface roughness prediction in high-torque milling operations},
author = {José Francisco Díez-Pastor and Andrés Bustillo and Guillem Quintana and César García-Osorio},
url = {http://dx.doi.org/10.1007/s00500-012-0846-0},
doi = {10.1007/s00500-012-0846-0},
issn = {1432-7643 (Print) 1433-7479 (Online)},
year = {2012},
date = {2012-01-01},
journal = {Soft Computing},
volume = {16},
number = {8},
pages = {1427-1437},
keywords = {Applied Machine Learning, Business intelligence, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; Maudes, Jesús; García-Osorio, César
Disturbing Neighbors Ensembles of Trees for Imbalanced Data Inproceedings
In: Wani, Arif M; Khoshgoftaar, Taghi; Zhu, Xingquan (Hill); Seliya, Naeem (Ed.): 11th International Conference on Machine Learning and Applications, ICMLA 2012, pp. 83-88, IEEE, Boca Ratón, EEUU, 2012, ISBN: 978-0-7695-4913-2.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods
@inproceedings{RDMG12,
title = {Disturbing Neighbors Ensembles of Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and Jesús Maudes and César García-Osorio},
editor = {Arif M Wani and Taghi Khoshgoftaar and Xingquan (Hill) Zhu and Naeem Seliya},
doi = {10.1109/ICMLA.2012.181},
isbn = {978-0-7695-4913-2},
year = {2012},
date = {2012-01-01},
booktitle = {11th International Conference on Machine Learning and Applications, ICMLA 2012},
volume = {2},
pages = {83-88},
publisher = {IEEE},
address = {Boca Ratón, EEUU},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2011
García-Pedrajas, Nicolás; García-Osorio, César
Constructing ensembles of classifiers using supervised projection methods based on misclassified instances Journal Article
In: Expert Systems with Applications, vol. 38, no. 1, pp. 343–359, 2011, ISSN: 0957-4174.
Links | BibTeX | Tags: Boosting, Classification, Data Mining, Linear projections, Subspace methods
@article{ensemblesProjections2011,
title = {Constructing ensembles of classifiers using supervised projection methods based on misclassified instances},
author = {Nicolás García-Pedrajas and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/B6V03-50GJ2J0-7/2/6b1890282b8bfb900f1174dc7a027a9c},
doi = {10.1016/j.eswa.2010.06.072},
issn = {0957-4174},
year = {2011},
date = {2011-01-01},
journal = {Expert Systems with Applications},
volume = {38},
number = {1},
pages = {343--359},
keywords = {Boosting, Classification, Data Mining, Linear projections, Subspace methods},
pubstate = {published},
tppubtype = {article}
}
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; Pardo, Carlos
Random projections for linear SVM ensembles Journal Article
In: Applied Intelligence, vol. 34, pp. 347-359, 2011, ISSN: 0924-669X, 1573-7497, (10.1007/s10489-011-0283-2).
Links | BibTeX | Tags: Data Mining, Ensemble methods, Support vector machines
@article{RandomProjectionsLinearSVMs,
title = {Random projections for linear SVM ensembles},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Carlos Pardo},
url = {http://dx.doi.org/10.1007/s10489-011-0283-2},
doi = {10.1007/s10489-011-0283-2},
issn = {0924-669X, 1573-7497},
year = {2011},
date = {2011-01-01},
journal = {Applied Intelligence},
volume = {34},
pages = {347-359},
publisher = {Springer Netherlands},
note = {10.1007/s10489-011-0283-2},
keywords = {Data Mining, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {article}
}
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José; Bustillo, Andrés
GRASP Forest: A New Ensemble Method for Trees Inproceedings
In: Sansone, Carlo; Kittler, Josef; Roli, Fabio (Ed.): 10th International Workshop on Multiple Classifier Systems, MCS 2011, pp. 66-75, Springer-Verlag, Naples, Italy, 2011, ISSN: 0302-9743.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@inproceedings{Diez-Pastor2011,
title = {GRASP Forest: A New Ensemble Method for Trees},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez and Andrés Bustillo},
editor = {Carlo Sansone and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-21557-5_9},
issn = {0302-9743},
year = {2011},
date = {2011-01-01},
booktitle = {10th International Workshop on Multiple Classifier Systems, MCS 2011},
volume = {6713},
pages = {66-75},
publisher = {Springer-Verlag},
address = {Naples, Italy},
series = {Lecture Notes in Computer Sciences},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Ensembles of Decision Trees for Imbalanced Data Inproceedings
In: Sansone, Carlo; Kittler, Josef; Roli, Fabio (Ed.): 10th International Workshop on Multiple Classifier Systems, MCS 2011, pp. 76-85, Springer-Verlag, Naples, Italy, 2011, ISSN: 0302-9743.
Links | BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{Rodriguez2011,
title = {Ensembles of Decision Trees for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Carlo Sansone and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-21557-5_10},
issn = {0302-9743},
year = {2011},
date = {2011-01-01},
booktitle = {10th International Workshop on Multiple Classifier Systems, MCS 2011},
volume = {6713},
pages = {76-85},
publisher = {Springer-Verlag},
address = {Naples, Italy},
series = {LNCS},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Pardo, Carlos; Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracles for Regression Ensembles Book Chapter
In: Okun, Oleg; Valentini, Giorgio; Re, Matteo (Ed.): Ensembles in Machine Learning Applications, vol. 373, pp. 181-199, Springer, 2011, ISBN: 978-3-642-22909-1.
Links | BibTeX | Tags: Data Mining, Random oracles, Regression ensembles
@inbook{PRDG11,
title = {Random Oracles for Regression Ensembles},
author = {Carlos Pardo and Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
editor = {Oleg Okun and Giorgio Valentini and Matteo Re},
doi = {10.1007/978-3-642-22910-7_11},
isbn = {978-3-642-22909-1},
year = {2011},
date = {2011-01-01},
booktitle = {Ensembles in Machine Learning Applications},
volume = {373},
pages = {181-199},
publisher = {Springer},
series = {Studies in Computational Intelligence},
keywords = {Data Mining, Random oracles, Regression ensembles},
pubstate = {published},
tppubtype = {inbook}
}
Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César; Santos, Pedro
Using Model Trees and their Ensembles for Imbalanced Data Inproceedings
In: Lozano, Jose A; Gámez, José A; Moreno, José A (Ed.): Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011, pp. 94–103, Springer, La Laguna, Spain, 2011, ISBN: 978-3-642-25273-0.
BibTeX | Tags: Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods
@inproceedings{RDGS11,
title = {Using Model Trees and their Ensembles for Imbalanced Data},
author = {Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio and Pedro Santos},
editor = {Jose A Lozano and José A Gámez and José A Moreno},
isbn = {978-3-642-25273-0},
year = {2011},
date = {2011-01-01},
booktitle = {Advances in Artificial Intelligence: 14th Conference of the Spanish Association for Artificial Intelligence, CAEPIA 2011},
volume = {7023},
pages = {94--103},
publisher = {Springer},
address = {La Laguna, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Class-imbalanced problems, Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Díez-Pastor, José Francisco; García-Osorio, César; Rodríguez, Juan José
GRASP Forest for regression: GRASP Metaheuristic Applied to the Construction of Ensembles of Regression Trees Inproceedings
In: CAEPIA 2011, 2011.
BibTeX | Tags: Data Mining, Decision trees, Regression ensembles
@inproceedings{DGR11,
title = {GRASP Forest for regression: GRASP Metaheuristic Applied to the Construction of Ensembles of Regression Trees},
author = {José Francisco Díez-Pastor and César García-Osorio and Juan José Rodríguez},
year = {2011},
date = {2011-01-01},
booktitle = {CAEPIA 2011},
keywords = {Data Mining, Decision trees, Regression ensembles},
pubstate = {published},
tppubtype = {inproceedings}
}
Pardo, Carlos; Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracles for Regression Ensembles Book Chapter
In: Ensembles in Machine Learning Applications, vol. 373, pp. 181-199, 2011, ISSN: 1860-949X.
Links | BibTeX | Tags: Data Mining, Random oracles, Regression ensembles
@inbook{ROforReg,
title = {Random Oracles for Regression Ensembles},
author = {Carlos Pardo and Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
doi = {10.1007/978-3-642-22910-7},
issn = {1860-949X},
year = {2011},
date = {2011-01-01},
booktitle = {Ensembles in Machine Learning Applications},
volume = {373},
pages = {181-199},
series = {Studies in Computational Intelligence},
keywords = {Data Mining, Random oracles, Regression ensembles},
pubstate = {published},
tppubtype = {inbook}
}
2010
Fyfe, C; Tino, P; Charles, D; García-Osorio, C; Yin, H (Ed.)
11th International Conference on Intelligent Data Engineering and Automated Learning Proceeding
IDEAL2010 Springer, 2010, ISBN: 978-3-642-15380-8.
BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining
@proceedings{fyfe:ideal2010,
title = {11th International Conference on Intelligent Data Engineering and Automated Learning},
editor = {C Fyfe and P Tino and D Charles and C García-Osorio and H Yin},
isbn = {978-3-642-15380-8},
year = {2010},
date = {2010-09-01},
publisher = {Springer},
organization = {IDEAL2010},
keywords = {Applied Machine Learning, Business intelligence, Data Mining},
pubstate = {published},
tppubtype = {proceedings}
}
García-Osorio, César; de Haro-García, Aida; García-Pedrajas, Nicolás
Democratic instance selection: A linear complexity instance selection algorithm based on classifier ensemble concepts Journal Article
In: Artif. Intell., vol. 174, no. 5-6, pp. 410–441, 2010, ISSN: 0004-3702.
Links | BibTeX | Tags: Big data, Data Mining, Instance selection
@article{1746771,
title = {Democratic instance selection: A linear complexity instance selection algorithm based on classifier ensemble concepts},
author = {César García-Osorio and Aida de Haro-García and Nicolás García-Pedrajas},
doi = {10.1016/j.artint.2010.01.001},
issn = {0004-3702},
year = {2010},
date = {2010-01-01},
journal = {Artif. Intell.},
volume = {174},
number = {5-6},
pages = {410--441},
publisher = {Elsevier Science Publishers Ltd.},
address = {Essex, UK},
keywords = {Big data, Data Mining, Instance selection},
pubstate = {published},
tppubtype = {article}
}
Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús; Díez-Pastor, José Francisco
An Experimental Study on Ensembles of Functional Trees Inproceedings
In: Gayar, Neamat El; Kittler, Josef; Roli, Fabio (Ed.): 9th International Workshop on Multiple Classifier Systems, MCS 2010, pp. 64-73, Cairo, Egypt, 2010, ISBN: 978-3-642-12126-5.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@inproceedings{RGMD10,
title = {An Experimental Study on Ensembles of Functional Trees},
author = {Juan José Rodríguez and César García-Osorio and Jesús Maudes and José Francisco Díez-Pastor},
editor = {Neamat El Gayar and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-12127-2_7},
isbn = {978-3-642-12126-5},
year = {2010},
date = {2010-01-01},
booktitle = {9th International Workshop on Multiple Classifier Systems, MCS 2010},
volume = {5997},
pages = {64-73},
address = {Cairo, Egypt},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
Pardo, Carlos; Rodríguez, Juan José; Díez-Pastor, José Francisco; García-Osorio, César
Random Oracles for Regression Ensembles Inproceedings
In: Workshop on Supervised and Unsupervised Ensemble Methods and their Applications, SUEMA 2010, pp. 85-96, 2010.
BibTeX | Tags: Data Mining, Random oracles, Regression ensembles
@inproceedings{PRDG10,
title = {Random Oracles for Regression Ensembles},
author = {Carlos Pardo and Juan José Rodríguez and José Francisco Díez-Pastor and César García-Osorio},
year = {2010},
date = {2010-01-01},
booktitle = {Workshop on Supervised and Unsupervised Ensemble Methods and their Applications, SUEMA 2010},
pages = {85-96},
keywords = {Data Mining, Random oracles, Regression ensembles},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús
Forests of Nested Dichotomies Journal Article
In: Pattern Recognition Letters, vol. 31, no. 2, pp. 125-132, 2010, ISSN: 0167-8655.
Links | BibTeX | Tags: Data Mining, Decision trees, Ensemble methods
@article{RGM10,
title = {Forests of Nested Dichotomies},
author = {Juan José Rodríguez and César García-Osorio and Jesús Maudes},
doi = {10.1016/j.patrec.2009.09.015},
issn = {0167-8655},
year = {2010},
date = {2010-01-01},
journal = {Pattern Recognition Letters},
volume = {31},
number = {2},
pages = {125-132},
keywords = {Data Mining, Decision trees, Ensemble methods},
pubstate = {published},
tppubtype = {article}
}
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César; Pardo, Carlos
Random Projections for SVM Ensembles Book Chapter
In: García-Pedrajas, Nicolás; Herrera, Francisco; Fyfe, Colin; Benítez, José Manuel; Ali, Moonis (Ed.): Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010, vol. 6097, pp. 87–95, Springer, Córdoba, Spain, 2010, ISBN: 978-3-642-13024-3.
Links | BibTeX | Tags: Data Mining, Support vector machines
@inbook{MRGP10,
title = {Random Projections for SVM Ensembles},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio and Carlos Pardo},
editor = {Nicolás García-Pedrajas and Francisco Herrera and Colin Fyfe and José Manuel Benítez and Moonis Ali},
url = {http://link.springer.com/chapter/10.1007%2F978-3-642-13025-0_10},
isbn = {978-3-642-13024-3},
year = {2010},
date = {2010-01-01},
booktitle = {Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010},
volume = {6097},
pages = {87--95},
publisher = {Springer},
address = {Córdoba, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Support vector machines},
pubstate = {published},
tppubtype = {inbook}
}
Pardo, Carlos; Rodríguez, Juan José; García-Osorio, César; Maudes, Jesús
An Empirical Study of Multilayer Perceptron Ensembles for Regression Tasks Inproceedings
In: García-Pedrajas, Nicolás; Herrera, Francisco; Fyfe, Colin; Benítez, José Manuel; Ali, Moonis (Ed.): Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010, pp. 106–115, Springer, Córdoba, Spain, 2010, ISBN: 978-3-642-13024-3.
BibTeX | Tags: Data Mining, Ensemble methods, Neural networks, Regression
@inproceedings{PRGM10,
title = {An Empirical Study of Multilayer Perceptron Ensembles for Regression Tasks},
author = {Carlos Pardo and Juan José Rodríguez and César García-Osorio and Jesús Maudes},
editor = {Nicolás García-Pedrajas and Francisco Herrera and Colin Fyfe and José Manuel Benítez and Moonis Ali},
isbn = {978-3-642-13024-3},
year = {2010},
date = {2010-01-01},
booktitle = {Trends in Applied Intelligent Systems: 23rd International Conference on Industrial Engineering and Other Applications of Applied Intelligent Systems, IEA/AIE 2010},
volume = {6097},
pages = {106--115},
publisher = {Springer},
address = {Córdoba, Spain},
series = {Lecture Notes in Computer Science},
keywords = {Data Mining, Ensemble methods, Neural networks, Regression},
pubstate = {published},
tppubtype = {inproceedings}
}
Díez-Pastor, José Francisco; Rodríguez, Juan José; Bustillo, Andrés; García-Osorio, César; Reñones, Aníbal
Aplicación de Bosques de Decisión a la Detección de Roturas en Herramientas Multifilo Inproceedings
In: Troncoso, Alicia; Riquelme, José C (Ed.): V Simposio de Teoría y Aplicaciones de Minería de Datos, TAMIDA 2010, pp. 275-282, Garceta grupo editorial, 2010, ISBN: 978-84-92812-60-8, (Simposio organizado por Red Española de Minería de Datos y Aprendizaje).
BibTeX | Tags: Applied Machine Learning, Business intelligence, Data Mining
@inproceedings{DRBG10,
title = {Aplicación de Bosques de Decisión a la Detección de Roturas en Herramientas Multifilo},
author = {José Francisco Díez-Pastor and Juan José Rodríguez and Andrés Bustillo and César García-Osorio and Aníbal Reñones},
editor = {Alicia Troncoso and José C Riquelme},
isbn = {978-84-92812-60-8},
year = {2010},
date = {2010-01-01},
booktitle = {V Simposio de Teoría y Aplicaciones de Minería de Datos, TAMIDA 2010},
pages = {275-282},
publisher = {Garceta grupo editorial},
note = {Simposio organizado por Red Española de Minería de Datos y Aprendizaje},
keywords = {Applied Machine Learning, Business intelligence, Data Mining},
pubstate = {published},
tppubtype = {inproceedings}
}
2009
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Disturbing Neighbors Ensembles for Linear SVM Inproceedings
In: Benediktsson, Jon Atli; Kittler, Josef; Roli, Fabio (Ed.): 8th International Workshop on Multiple Classifier Systems, MCS 2009, pp. 191–200, Springer-Verlag, Reykjavik, Iceland, 2009, ISBN: 978-3-642-02325-5.
Links | BibTeX | Tags: Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines
@inproceedings{MRG09a,
title = {Disturbing Neighbors Ensembles for Linear SVM},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
editor = {Jon Atli Benediktsson and Josef Kittler and Fabio Roli},
doi = {10.1007/978-3-642-02326-2_20},
isbn = {978-3-642-02325-5},
year = {2009},
date = {2009-01-01},
booktitle = {8th International Workshop on Multiple Classifier Systems, MCS 2009},
volume = {5519},
pages = {191--200},
publisher = {Springer-Verlag},
address = {Reykjavik, Iceland},
series = {Lecture Notes in Computer Science},
keywords = {Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods, Support vector machines},
pubstate = {published},
tppubtype = {inproceedings}
}
Rodríguez, Juan José; Maudes, Jesús; Pardo, Carlos; García-Osorio, César
Disturbing Neighbors Ensembles for Regression Inproceedings
In: XIII Conferencia de la Asociación Española para la Inteligencia Artificial, CAEPIA - TTIA 2009, pp. 369-378, Asociación Española para la Inteligencia Artificial, Sevilla, Spain, 2009, ISBN: 978-84-692-6424-9.
BibTeX | Tags: Data Mining, Disturbing neighbors, Regression, Regression ensembles
@inproceedings{RMPG09,
title = {Disturbing Neighbors Ensembles for Regression},
author = {Juan José Rodríguez and Jesús Maudes and Carlos Pardo and César García-Osorio},
isbn = {978-84-692-6424-9},
year = {2009},
date = {2009-01-01},
booktitle = {XIII Conferencia de la Asociación Española para la
Inteligencia Artificial, CAEPIA - TTIA 2009},
pages = {369-378},
publisher = {Asociación Española para la Inteligencia Artificial},
address = {Sevilla, Spain},
keywords = {Data Mining, Disturbing neighbors, Regression, Regression ensembles},
pubstate = {published},
tppubtype = {inproceedings}
}
2008
García-Osorio, César; Díez-Pastor, José Francisco; Maudes, Jesús; Rodríguez, Juan José
LICENSE PLATE NUMBER RECOGNITION --- New heuristics and a comparative study of classifiers Inproceedings
In: Filipe, Joaquim; Andrade-Cetto, Juan; Ferrier, Jean-Louis (Ed.): 5th International Conference on Informatics in Control, Automation and Robotics (ICINCO 2008), pp. 268-273, Universidad da Madeira, Funchal, Madeira, Portugal, 2008, ISBN: 978-989-8111-31-9.
BibTeX | Tags: Data Mining, License plate number recognition
@inproceedings{ICINCO08,
title = {LICENSE PLATE NUMBER RECOGNITION --- New heuristics and a comparative study of classifiers},
author = {César García-Osorio and José Francisco Díez-Pastor and Jesús Maudes and Juan José Rodríguez},
editor = {Joaquim Filipe and Juan Andrade-Cetto and Jean-Louis Ferrier},
isbn = {978-989-8111-31-9},
year = {2008},
date = {2008-05-01},
booktitle = {5th International Conference on Informatics in Control, Automation and Robotics (ICINCO 2008)},
pages = {268-273},
publisher = {Universidad da Madeira},
address = {Funchal, Madeira, Portugal},
keywords = {Data Mining, License plate number recognition},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Osorio, César; García-Pedrajas, Nicolás
Constructing ensembles of classifiers using linear projections based on misclassified instances Inproceedings
In: Verleysen, Michel (Ed.): 16th European Symposium on Artificial Neural Networks (ESANN 2008), pp. 283–288, d-side publications, Bruges, Belgium, 2008, ISBN: 2-930307-08-0.
BibTeX | Tags: Classifier ensembles, Data Mining, Ensemble methods, Linear projections
@inproceedings{ESANN08,
title = {Constructing ensembles of classifiers using linear projections based on misclassified instances},
author = {César García-Osorio and Nicolás García-Pedrajas},
editor = {Michel Verleysen},
isbn = {2-930307-08-0},
year = {2008},
date = {2008-04-01},
booktitle = {16th European Symposium on Artificial Neural Networks (ESANN 2008)},
pages = {283--288},
publisher = {d-side publications},
address = {Bruges, Belgium},
keywords = {Classifier ensembles, Data Mining, Ensemble methods, Linear projections},
pubstate = {published},
tppubtype = {inproceedings}
}
Maudes-Raedo, Jesús; Rodríguez, Juan José; García-Osorio, César
Disturbing Neighbors Diversity for Decision Forest Inproceedings
In: Valentini, Giorgio; Okun, Oleg (Ed.): Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA 2008), pp. 67–71, Patras, Grecia, 2008, ISBN: 978-84-612-4475-1.
BibTeX | Tags: Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods
@inproceedings{SUEMA2008:DisturbingNeighbors,
title = {Disturbing Neighbors Diversity for Decision Forest},
author = {Jesús Maudes-Raedo and Juan José Rodríguez and César García-Osorio},
editor = {Giorgio Valentini and Oleg Okun},
isbn = {978-84-612-4475-1},
year = {2008},
date = {2008-00-01},
booktitle = {Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA 2008)},
pages = {67--71},
address = {Patras, Grecia},
keywords = {Classifier ensembles, Data Mining, Decision trees, Disturbing neighbors, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
2007
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Cascading with VDM and Binary Decision Trees for Nominal Data Inproceedings
In: Okun, Oleg; Valentini, Giorgio (Ed.): Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA'2007), pp. 28–42, Gerona, 2007.
BibTeX | Tags: Cascading, Data Mining, Ensemble methods
@inproceedings{MRG07b,
title = {Cascading with VDM and Binary Decision Trees for Nominal Data},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
editor = {Oleg Okun and Giorgio Valentini},
year = {2007},
date = {2007-06-01},
booktitle = {Workshop on Supervised and Unsupervised Ensemble Methods and Their Applications (SUEMA'2007)},
pages = {28--42},
address = {Gerona},
keywords = {Cascading, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Pedrajas, Nicolás; García-Osorio, César; Fyfe, Colin
Nonlinear ``boosting'' projections for ensemble construction Journal Article
In: Journal of Machine Learning Research, vol. 8, pp. 1–33, 2007, ISSN: 1532-4435.
Abstract | Links | BibTeX | Tags: Boosting, Classifier ensembles, Data Mining, Ensemble methods, Neural networks, Nonlinear projections
@article{cgosorio07boosting,
title = {Nonlinear ``boosting'' projections for ensemble construction},
author = {Nicolás García-Pedrajas and César García-Osorio and Colin Fyfe},
url = {http://jmlr.csail.mit.edu/papers/volume8/garcia-pedrajas07a/garcia-pedrajas07a.pdf},
issn = {1532-4435},
year = {2007},
date = {2007-01-01},
journal = {Journal of Machine Learning Research},
volume = {8},
pages = {1--33},
abstract = {In this paper we propose a novel approach for ensemble construction based on the use of nonlinear
projections to achieve both accuracy and diversity of individual classifiers. The proposed approach
combines the philosophy of boosting, putting more effort on difficult instances, with the basis of
the random subspace method. Our main contribution is that instead of using a random subspace,
we construct a projection taking into account the instances which have posed most difficulties to
previous classifiers. In this way, consecutive nonlinear projections are created by a neural network
trained using only incorrectly classified instances. The feature subspace induced by the hidden layer
of this network is used as the input space to a new classifier. The method is compared with bagging
and boosting techniques, showing an improved performance on a large set of 44 problems from the
UCI Machine Learning Repository. An additional study showed that the proposed approach is less
sensitive to noise in the data than boosting methods.},
keywords = {Boosting, Classifier ensembles, Data Mining, Ensemble methods, Neural networks, Nonlinear projections},
pubstate = {published},
tppubtype = {article}
}
projections to achieve both accuracy and diversity of individual classifiers. The proposed approach
combines the philosophy of boosting, putting more effort on difficult instances, with the basis of
the random subspace method. Our main contribution is that instead of using a random subspace,
we construct a projection taking into account the instances which have posed most difficulties to
previous classifiers. In this way, consecutive nonlinear projections are created by a neural network
trained using only incorrectly classified instances. The feature subspace induced by the hidden layer
of this network is used as the input space to a new classifier. The method is compared with bagging
and boosting techniques, showing an improved performance on a large set of 44 problems from the
UCI Machine Learning Repository. An additional study showed that the proposed approach is less
sensitive to noise in the data than boosting methods.
Maudes, Jesús; Rodríguez, Juan José; García-Osorio, César
Cascading for Nominal Data Inproceedings
In: 7th International Workshop, MCS 2007, pp. 231–240, Springer-Verlag, Prague, Czech Republic, 2007, ISSN: 0302-9743.
Links | BibTeX | Tags: Cascading, Data Mining, Ensemble methods
@inproceedings{CascadingLNCS2007,
title = {Cascading for Nominal Data},
author = {Jesús Maudes and Juan José Rodríguez and César García-Osorio},
doi = {10.1007/978-3-540-72523-7_24},
issn = {0302-9743},
year = {2007},
date = {2007-01-01},
booktitle = {7th International Workshop, MCS 2007},
volume = {4472},
pages = {231--240},
publisher = {Springer-Verlag},
address = {Prague, Czech Republic},
series = {Lectures Notes in Computer Science},
keywords = {Cascading, Data Mining, Ensemble methods},
pubstate = {published},
tppubtype = {inproceedings}
}
García-Osorio, César; Fyfe, Colin
Successes and New Directions in Data Mining Book Chapter
In: Chapter Visualizing Multi Dimensional Data, pp. 236–276, Idea Group Inc., 2007, ISBN: 1599046458.
Links | BibTeX | Tags: Andrews curves, Data Mining, Data visualization, Exploratory data analysis
@inbook{DataMiningSucesses2007,
title = {Successes and New Directions in Data Mining},
author = {César García-Osorio and Colin Fyfe},
url = {http://books.google.es/books?id=VCZrtbjTRp8C&pg=PA147&ots=Ap4Si7ucUU&dq=Success+and+New+Directions+in+Data+Mining&sig=-v34qqba2rihMBHTnyxGGLX-GaI#PPP1,M1},
doi = {10.3217/jucs-011-11-1806},
isbn = {1599046458},
year = {2007},
date = {2007-01-01},
pages = {236--276},
publisher = {Idea Group Inc.},
chapter = {Visualizing Multi Dimensional Data},
keywords = {Andrews curves, Data Mining, Data visualization, Exploratory data analysis},
pubstate = {published},
tppubtype = {inbook}
}
García-Osorio, César; Benito-Esteban, J L; Maudes, Jesús; Rodríguez, Juan José
Adding Interactivity to Andrews Curves and Extensions Inproceedings
In: Villanueva, J J (Ed.): Visualization, Imaging, and Image Processing (VIIP 2007), pp. 118–122, ACTA Press, Palma de Mallorca, 2007, ISBN: 978--0--88986--691--1.
Links | BibTeX | Tags: Andrews curves, Data Mining, Data visualization, Exploratory data analysis
@inproceedings{VIIP2007,
title = {Adding Interactivity to Andrews Curves and Extensions},
author = {César García-Osorio and J L Benito-Esteban and Jesús Maudes and Juan José Rodríguez},
editor = {J J Villanueva},
url = {http://www.actapress.com/Abstract.aspx?paperId=31544},
isbn = {978--0--88986--691--1},
year = {2007},
date = {2007-00-01},
booktitle = {Visualization, Imaging, and Image Processing (VIIP 2007)},
pages = {118--122},
publisher = {ACTA Press},
address = {Palma de Mallorca},
keywords = {Andrews curves, Data Mining, Data visualization, Exploratory data analysis},
pubstate = {published},
tppubtype = {inproceedings}
}
2005
García-Osorio, César; Fyfe, Colin
Regaining Sparsity in Kernel Principal Components Journal Article
In: Neurocomputing, vol. 67, pp. 398–402, 2005, ISSN: 0925-2312.
Abstract | BibTeX | Tags: Data Mining, Kernel methods, Kernel Principal Components
@article{cgosorio04skpca,
title = {Regaining Sparsity in Kernel Principal Components},
author = {César García-Osorio and Colin Fyfe},
issn = {0925-2312},
year = {2005},
date = {2005-08-01},
journal = {Neurocomputing},
volume = {67},
pages = {398--402},
abstract = {Support Vector Machines are supervised regression and classification machines which have the nice property of automatically identifying which of the data points are most important in creating the machine. Kernel Principal Component Analysis (KPCA) is a related technique in that it also relies on linear operations in a feature space but does not have this ability to identify important points. Sparse KPCA goes too far in that it identifies a single data point as most important. We show how, by bagging the data, we may create a compromise which gives us a sparse but not grandmother representation for KPCA.},
keywords = {Data Mining, Kernel methods, Kernel Principal Components},
pubstate = {published},
tppubtype = {article}
}
García-Osorio, César
Data Mining and Visualization PhD Thesis
School of Computing, Paisley University, 2005.
BibTeX | Tags: Andrews curves, Data Mining, Data visualization, Neural networks, Self organizing maps
@phdthesis{cgosorio05thesis,
title = {Data Mining and Visualization},
author = {César García-Osorio},
year = {2005},
date = {2005-05-01},
address = {High Street, Paisley, Strathclyde, PA1 2BE, United Kingdon},
school = {School of Computing, Paisley University},
keywords = {Andrews curves, Data Mining, Data visualization, Neural networks, Self organizing maps},
pubstate = {published},
tppubtype = {phdthesis}
}
García-Osorio, César; Fyfe, Colin
The Combined Use of Self-organizing Maps and Andrews' Curves Journal Article
In: International Journal of Neural Systems, vol. 15, no. 3, pp. 1-10, 2005, ISSN: 0129-0657.
Links | BibTeX | Tags: Andrews curves, Data Mining, Data visualization, Neural networks, Self organizing maps
@article{cgosorio05AndrewsSOM,
title = {The Combined Use of Self-organizing Maps and Andrews' Curves},
author = {César García-Osorio and Colin Fyfe},
doi = {10.1142/S0129065705000207},
issn = {0129-0657},
year = {2005},
date = {2005-01-01},
journal = {International Journal of Neural Systems},
volume = {15},
number = {3},
pages = {1-10},
keywords = {Andrews curves, Data Mining, Data visualization, Neural networks, Self organizing maps},
pubstate = {published},
tppubtype = {article}
}