2024
Garrido-Labrador, José Luis; Serrano-Mamolar, Ana; Maudes-Raedo, Jesús; Rodríguez, Juan José; García-Osorio, César
Ensemble methods and semi-supervised learning for information fusion: A review and future research directions Journal Article
In: Information Fusion, vol. 107, 2024.
Abstract | Links | BibTeX | Tags: Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, Research trends, Semi-supervised ensemble classification, Semi-supervised learning
@article{garrido2024ensemble,
title = {Ensemble methods and semi-supervised learning for information fusion: A review and future research directions},
author = {José Luis Garrido-Labrador and Ana Serrano-Mamolar and Jesús Maudes-Raedo and Juan José Rodríguez and César García-Osorio},
url = {https://doi.org/10.1016/j.inffus.2024.102310},
doi = {10.1016/j.inffus.2024.102310},
year = {2024},
date = {2024-02-02},
urldate = {2024-02-02},
journal = {Information Fusion},
volume = {107},
abstract = {Advances over the past decade at the intersection of information fusion methods and Semi-Supervised Learning (SSL) are investigated in this paper that grapple with challenges related to limited labelled data. To do so, a bibliographic review of papers published since 2013 is presented, in which ensemble methods are combined with new machine learning algorithms. A total of 128 new proposals using SSL algorithms for ensemble construction are identified and classified. All the methods are categorised by approach, ensemble type, and base classifier. Experimental protocols, pre-processing, dataset usage, unlabelled ratios, and statistical tests are also assessed, underlining the major trends, and some shortcomings of particular studies. It is evident from this literature review that foundational algorithms such as self-training and co-training are influencing current developments, and that innovative ensemble …
},
keywords = {Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, Research trends, Semi-supervised ensemble classification, Semi-supervised learning},
pubstate = {published},
tppubtype = {article}
}
2018
Kuncheva, Ludmila I; Rodríguez, Juan José
On feature selection protocols for very low-sample-size data Journal Article
In: Pattern Recognition, vol. 81, pp. 660-673, 2018, ISSN: 0031-3203.
Abstract | Links | BibTeX | Tags: Cross-validation, Experimental protocol, Feature selection, SELECTED, Training/testing, Wide datasets
@article{Kuncheva2018b,
title = {On feature selection protocols for very low-sample-size data},
author = {Ludmila I Kuncheva and Juan José Rodríguez},
url = {https://www.sciencedirect.com/science/article/pii/S003132031830102X},
doi = {10.1016/j.patcog.2018.03.012},
issn = {0031-3203},
year = {2018},
date = {2018-09-01},
journal = {Pattern Recognition},
volume = {81},
pages = {660-673},
abstract = {High-dimensional data with very few instances are typical in many application domains. Selecting a highly discriminative subset of the original features is often the main interest of the end user. The widely-used feature selection protocol for such type of data consists of two steps. First, features are selected from the data (possibly through cross-validation), and, second, a cross-validation protocol is applied to test a classifier using the selected features. The selected feature set and the testing accuracy are then returned to the user. For the lack of a better option, the same low-sample-size dataset is used in both steps. Questioning the validity of this protocol, we carried out an experiment using 24 high-dimensional datasets, three feature selection methods and five classifier models. We found that the accuracy returned by the above protocol is heavily biased, and therefore propose an alternative protocol which avoids the contamination by including both steps in a single cross-validation loop. Statistical tests verify that the classification accuracy returned by the proper protocol is significantly closer to the true accuracy (estimated from an independent testing set) compared to that returned by the currently favoured protocol.},
keywords = {Cross-validation, Experimental protocol, Feature selection, SELECTED, Training/testing, Wide datasets},
pubstate = {published},
tppubtype = {article}
}