2016
Arnaiz-González, Álvar; Díez-Pastor, José Francisco; Rodríguez, Juan José; García-Osorio, César
Instance selection for regression: Adapting DROP Journal Article
In: Neurocomputing, vol. 201, pp. 66–81, 2016, ISSN: 0925-2312.
Abstract | Links | BibTeX | Tags: Data Mining, DROP, Instance selection, Noise filtering, Regression
@article{ArnaizGonzález2016,
title = {Instance selection for regression: Adapting DROP},
author = {Álvar Arnaiz-González and José Francisco Díez-Pastor and Juan José Rodríguez and César García-Osorio},
url = {http://www.sciencedirect.com/science/article/pii/S0925231216301953},
doi = {10.1016/j.neucom.2016.04.003},
issn = {0925-2312},
year = {2016},
date = {2016-01-01},
journal = {Neurocomputing},
volume = {201},
pages = {66–81},
abstract = {Abstract Machine Learning has two central processes of interest that captivate the scientific community: classification and regression. Although instance selection for classification has shown its usefulness and has been researched in depth, instance selection for regression has not followed the same path and there are few published algorithms on the subject. In this paper, we propose that various adaptations of DROP, a well-known family of instance selection methods for classification, be applied to regression. Their behaviour is analysed using a broad range of datasets. The results are presented of the analysis of four new proposals for the reduction of dataset size, the effect on error when several classifiers are trained with the reduced dataset, and their robustness against noise. This last aspect is especially important, since in real life, it is frequent that the registered data be inexact and present distortions due to different causes: errors in the measurement tools, typos when writing results, existence of outliers and spurious readings, corruption in files, etc. When the datasets are small it is possible to manually correct these problems, but for big and huge datasets is better to have automatic methods to deal with these problems. In the experimental part, the proposed methods are found to be quite robust to noise.},
keywords = {Data Mining, DROP, Instance selection, Noise filtering, Regression},
pubstate = {published},
tppubtype = {article}
}
Abstract Machine Learning has two central processes of interest that captivate the scientific community: classification and regression. Although instance selection for classification has shown its usefulness and has been researched in depth, instance selection for regression has not followed the same path and there are few published algorithms on the subject. In this paper, we propose that various adaptations of DROP, a well-known family of instance selection methods for classification, be applied to regression. Their behaviour is analysed using a broad range of datasets. The results are presented of the analysis of four new proposals for the reduction of dataset size, the effect on error when several classifiers are trained with the reduced dataset, and their robustness against noise. This last aspect is especially important, since in real life, it is frequent that the registered data be inexact and present distortions due to different causes: errors in the measurement tools, typos when writing results, existence of outliers and spurious readings, corruption in files, etc. When the datasets are small it is possible to manually correct these problems, but for big and huge datasets is better to have automatic methods to deal with these problems. In the experimental part, the proposed methods are found to be quite robust to noise.