2024
Garrido-Labrador, José Luis; Serrano-Mamolar, Ana; Maudes-Raedo, Jesús; Rodríguez, Juan J.; García-Osorio, César
Ensemble methods and semi-supervised learning for information fusion: A review and future research directions Journal Article
In: Information Fusion, vol. 107, 2024, ISSN: 1566-2535.
Links | BibTeX | Tags: p_humanaid, PID2020-119894GB-I00, SSL
@article{Garrido-Labrador2024,
title = {Ensemble methods and semi-supervised learning for information fusion: A review and future research directions},
author = {José Luis Garrido-Labrador and Ana Serrano-Mamolar and Jesús Maudes-Raedo and Juan J. Rodríguez and César García-Osorio},
doi = {10.1016/j.inffus.2024.102310},
issn = {1566-2535},
year = {2024},
date = {2024-07-00},
urldate = {2024-07-00},
journal = {Information Fusion},
volume = {107},
publisher = {Elsevier BV},
keywords = {p_humanaid, PID2020-119894GB-I00, SSL},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I.; Garrido-Labrador, José Luis; Ramos-Pérez, Ismael; Hennessey, Samuel L.; Rodríguez, Juan J.
Semi-supervised classification with pairwise constraints: A case study on animal identification from video Journal Article
In: Information Fusion, vol. 104, 2024, ISSN: 1566-2535.
Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Kuncheva2024,
title = {Semi-supervised classification with pairwise constraints: A case study on animal identification from video},
author = {Ludmila I. Kuncheva and José Luis Garrido-Labrador and Ismael Ramos-Pérez and Samuel L. Hennessey and Juan J. Rodríguez},
doi = {10.1016/j.inffus.2023.102188},
issn = {1566-2535},
year = {2024},
date = {2024-04-00},
urldate = {2024-04-00},
journal = {Information Fusion},
volume = {104},
publisher = {Elsevier BV},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Ramos-Pérez, Ismael; Barbero-Aparicio, José Antonio; Canepa-Oneto, Antonio; Arnaiz-González, Álvar; Maudes-Raedo, Jesús
An Extensive Performance Comparison between Feature Reduction and Feature Selection Preprocessing Algorithms on Imbalanced Wide Data Journal Article
In: Information, vol. 15, no. 4, 2024, ISSN: 2078-2489.
Abstract | Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Ramos-Pérez2024,
title = {An Extensive Performance Comparison between Feature Reduction and Feature Selection Preprocessing Algorithms on Imbalanced Wide Data},
author = {Ismael Ramos-Pérez and José Antonio Barbero-Aparicio and Antonio Canepa-Oneto and Álvar Arnaiz-González and Jesús Maudes-Raedo},
doi = {10.3390/info15040223},
issn = {2078-2489},
year = {2024},
date = {2024-04-00},
urldate = {2024-04-00},
journal = {Information},
volume = {15},
number = {4},
publisher = {MDPI AG},
abstract = {<jats:p>The most common preprocessing techniques used to deal with datasets having high dimensionality and a low number of instances—or wide data—are feature reduction (FR), feature selection (FS), and resampling. This study explores the use of FR and resampling techniques, expanding the limited comparisons between FR and filter FS methods in the existing literature, especially in the context of wide data. We compare the optimal outcomes from a previous comprehensive study of FS against new experiments conducted using FR methods. Two specific challenges associated with the use of FR are outlined in detail: finding FR methods that are compatible with wide data and the need for a reduction estimator of nonlinear approaches to process out-of-sample data. The experimental study compares 17 techniques, including supervised, unsupervised, linear, and nonlinear approaches, using 7 resampling strategies and 5 classifiers. The results demonstrate which configurations are optimal, according to their performance and computation time. Moreover, the best configuration—namely, k Nearest Neighbor (KNN) + the Maximal Margin Criterion (MMC) feature reducer with no resampling—is shown to outperform state-of-the-art algorithms.</jats:p>},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Maestro-Prieto, Jose Alberto; Ramírez-Sanz, José Miguel; Andrés Bustillo, and Juan José Rodriguez-Díez
Semi-supervised diagnosis of wind-turbine gearbox misalignment and imbalance faults Journal Article
In: Applied Intelligence, 2024, ISSN: 1573-7497.
Abstract | Links | BibTeX | Tags: Bearing failures, Fault detection and diagnosis, PID2020-119894GB-I00, Powertrain failures, Semi-supervised learning, SSL, Wind turbine
@article{Maestro-Prieto2024,
title = {Semi-supervised diagnosis of wind-turbine gearbox misalignment and imbalance faults},
author = {Jose Alberto Maestro-Prieto and José Miguel Ramírez-Sanz and Andrés Bustillo,and Juan José Rodriguez-Díez},
url = {https://doi.org/10.1007/s10489-024-05373-6},
doi = {10.1007/s10489-024-05373-6},
issn = {1573-7497},
year = {2024},
date = {2024-03-28},
urldate = {2024-03-28},
journal = {Applied Intelligence},
abstract = {Both wear-induced bearing failure and misalignment of the powertrain between the rotor and the electrical generator are common failure modes in wind-turbine motors. In this study, Semi-Supervised Learning (SSL) is applied to a fault detection and diagnosis solution. Firstly, a dataset is generated containing both normal operating patterns and seven different failure classes of the two aforementioned failure modes that vary in intensity. Several datasets are then generated, maintaining different numbers of labeled instances and unlabeling the others, in order to evaluate the number of labeled instances needed for the desired accuracy level. Subsequently, different types of SSL algorithms and combinations of algorithms are trained and then evaluated with the test data. The results showed that an SSL approach could improve the accuracy of trained classifiers when a small number of labeled instances were used together with many unlabeled instances to train a Co-Training algorithm or combinations of such algorithms. When a few labeled instances (fewer than 10% or 327 instances, in this case) were used together with unlabeled instances, the SSL algorithms outperformed the result obtained with the Supervised Learning (SL) techniques used as a benchmark. When the number of labeled instances was sufficient, the SL algorithm (using only labeled instances) performed better than the SSL algorithms (accuracy levels of 87.04% vs. 86.45%, when labeling 10% of instances). A competitive accuracy of 97.73% was achieved with the SL algorithm processing a subset of 40% of the labeled instances.},
keywords = {Bearing failures, Fault detection and diagnosis, PID2020-119894GB-I00, Powertrain failures, Semi-supervised learning, SSL, Wind turbine},
pubstate = {published},
tppubtype = {article}
}
Martin-Melero, Íñigo; Serrano-Mamolar, Ana; Rodríguez-Diez, Juan J.
Evaluation of Semi-Supervised Machine Learning applied to Affective State Detection Bachelor Thesis
2024.
Links | BibTeX | Tags: p_humanaid, PID2020-119894GB-I00, Semi-supervised learning, SSL
@bachelorthesis{Martin-Melero2024,
title = {Evaluation of Semi-Supervised Machine Learning applied to Affective State Detection},
author = {Íñigo Martin-Melero and Ana Serrano-Mamolar and Juan J. Rodríguez-Diez},
doi = {10.1109/percomworkshops59983.2024.10502901},
year = {2024},
date = {2024-03-11},
urldate = {2024-03-11},
publisher = {IEEE},
keywords = {p_humanaid, PID2020-119894GB-I00, Semi-supervised learning, SSL},
pubstate = {published},
tppubtype = {bachelorthesis}
}
Garrido-Labrador, José Luis; Serrano-Mamolar, Ana; Maudes-Raedo, Jesús; Rodríguez, Juan José; García-Osorio, César
Ensemble methods and semi-supervised learning for information fusion: A review and future research directions Journal Article
In: Information Fusion, vol. 107, 2024.
Abstract | Links | BibTeX | Tags: Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, PID2020-119894GB-I00, Research trends, Semi-supervised ensemble classification, Semi-supervised learning, SSL
@article{garrido2024ensemble,
title = {Ensemble methods and semi-supervised learning for information fusion: A review and future research directions},
author = {José Luis Garrido-Labrador and Ana Serrano-Mamolar and Jesús Maudes-Raedo and Juan José Rodríguez and César García-Osorio},
url = {https://doi.org/10.1016/j.inffus.2024.102310},
doi = {10.1016/j.inffus.2024.102310},
year = {2024},
date = {2024-02-02},
urldate = {2024-02-02},
journal = {Information Fusion},
volume = {107},
abstract = {Advances over the past decade at the intersection of information fusion methods and Semi-Supervised Learning (SSL) are investigated in this paper that grapple with challenges related to limited labelled data. To do so, a bibliographic review of papers published since 2013 is presented, in which ensemble methods are combined with new machine learning algorithms. A total of 128 new proposals using SSL algorithms for ensemble construction are identified and classified. All the methods are categorised by approach, ensemble type, and base classifier. Experimental protocols, pre-processing, dataset usage, unlabelled ratios, and statistical tests are also assessed, underlining the major trends, and some shortcomings of particular studies. It is evident from this literature review that foundational algorithms such as self-training and co-training are influencing current developments, and that innovative ensemble …
},
keywords = {Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, PID2020-119894GB-I00, Research trends, Semi-supervised ensemble classification, Semi-supervised learning, SSL},
pubstate = {published},
tppubtype = {article}
}
Barbero-Aparicio, José A.; Olivares-Gil, Alicia; Rodríguez, Juan J.; García-Osorio, César; Díez-Pastor, José F.
Addressing data scarcity in protein fitness landscape analysis: A study on semi-supervised and deep transfer learning techniques Journal Article
In: Information Fusion, vol. 102, pp. 102035, 2024, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: bioinformatics, Machine learning, PID2020-119894GB-I00, Protein fitness prediction, Semi-supervised learning, Small datasets, SSL, Transfer learning
@article{barbero-aparicio2023b,
title = {Addressing data scarcity in protein fitness landscape analysis: A study on semi-supervised and deep transfer learning techniques},
author = {José A. Barbero-Aparicio and Alicia Olivares-Gil and Juan J. Rodríguez and César García-Osorio and José F. Díez-Pastor},
url = {https://www.sciencedirect.com/science/article/pii/S1566253523003512},
doi = {10.1016/j.inffus.2023.102035},
issn = {1566-2535},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Information Fusion},
volume = {102},
pages = {102035},
abstract = {This paper presents a comprehensive analysis of deep transfer learning methods, supervised methods, and semi-supervised methods in the context of protein fitness prediction, with a focus on small datasets. The analysis includes the exploration of the combination of different data sources to enhance the performance of the models. While deep learning and deep transfer learning methods have shown remarkable performance
in situations with abundant data, this study aims to address the more realistic scenario faced by wet lab researchers, where labeled data is often limited. The novelty of this work lies in its examination of deep transfer learning in the context of small datasets and its consideration of semi-supervised methods and multi-view strategies. While previous research has extensively explored deep transfer learning in large dataset scenarios, little attention has been given to its efficacy in small dataset settings or its comparison with semi-supervised approaches. Our findings suggest that deep transfer learning, exemplified by ProteinBERT, shows promising performance in this context compared to the rest of the methods across various evaluation metrics, not only in small dataset contexts but also in large dataset scenarios. This highlights the robustness and versatility of deep transfer learning in protein fitness prediction tasks, even with limited labeled data. The results of this study shed light on the potential of deep transfer learning as a state-of-the-art approach in the field of protein fitness prediction. By leveraging pre-trained models and fine-tuning them on small datasets, researchers can achieve competitive performance surpassing traditional supervised and semi-supervised methods. These findings provide valuable insights for wet lab researchers who face the challenge of limited labeled data, enabling them to make informed decisions when selecting the most effective methodology for their specific protein fitness prediction tasks. Additionally, the study investigated the combination of two different sources of information (encodings) through our enhanced semi-supervised methods, yielding noteworthy results improving their base model and providing valuable insights for further research. The presented analysis contributes to a better understanding of the capabilities and limitations of different learning approaches in small dataset scenarios, ultimately aiding in the development of improved protein fitness prediction methods},
keywords = {bioinformatics, Machine learning, PID2020-119894GB-I00, Protein fitness prediction, Semi-supervised learning, Small datasets, SSL, Transfer learning},
pubstate = {published},
tppubtype = {article}
}
in situations with abundant data, this study aims to address the more realistic scenario faced by wet lab researchers, where labeled data is often limited. The novelty of this work lies in its examination of deep transfer learning in the context of small datasets and its consideration of semi-supervised methods and multi-view strategies. While previous research has extensively explored deep transfer learning in large dataset scenarios, little attention has been given to its efficacy in small dataset settings or its comparison with semi-supervised approaches. Our findings suggest that deep transfer learning, exemplified by ProteinBERT, shows promising performance in this context compared to the rest of the methods across various evaluation metrics, not only in small dataset contexts but also in large dataset scenarios. This highlights the robustness and versatility of deep transfer learning in protein fitness prediction tasks, even with limited labeled data. The results of this study shed light on the potential of deep transfer learning as a state-of-the-art approach in the field of protein fitness prediction. By leveraging pre-trained models and fine-tuning them on small datasets, researchers can achieve competitive performance surpassing traditional supervised and semi-supervised methods. These findings provide valuable insights for wet lab researchers who face the challenge of limited labeled data, enabling them to make informed decisions when selecting the most effective methodology for their specific protein fitness prediction tasks. Additionally, the study investigated the combination of two different sources of information (encodings) through our enhanced semi-supervised methods, yielding noteworthy results improving their base model and providing valuable insights for further research. The presented analysis contributes to a better understanding of the capabilities and limitations of different learning approaches in small dataset scenarios, ultimately aiding in the development of improved protein fitness prediction methods
2023
Ramírez-Sanz, José Miguel; Maestro-Prieto, Jose-Alberto; Arnaiz-González, Álvar; Bustillo, Andrés
Semi-supervised learning for industrial fault detection and diagnosis: A systemic review Journal Article
In: ISA Transactions, vol. 143, pp. 255–270, 2023, ISSN: 0019-0578.
Links | BibTeX | Tags: p_humanaid, PID2020-119894GB-I00
@article{Ramírez-Sanz2023e,
title = {Semi-supervised learning for industrial fault detection and diagnosis: A systemic review},
author = {José Miguel Ramírez-Sanz and Jose-Alberto Maestro-Prieto and Álvar Arnaiz-González and Andrés Bustillo},
doi = {10.1016/j.isatra.2023.09.027},
issn = {0019-0578},
year = {2023},
date = {2023-12-00},
urldate = {2023-12-00},
journal = {ISA Transactions},
volume = {143},
pages = {255--270},
publisher = {Elsevier BV},
keywords = {p_humanaid, PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Mena-Alonso, Álvaro; Latorre-Carmona, Pedro; González, Dorys C.; Díez-Pastor, José F.; Rodríguez, Juan J.; Mínguez, Jesús; Vicente, Miguel A.
A cost-effective stereo camera-based system for measuring crack propagation in fibre-reinforced concrete Journal Article
In: Archiv.Civ.Mech.Eng, vol. 23, no. 3, 2023, ISSN: 2083-3318.
Abstract | Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Mena-Alonso2023,
title = {A cost-effective stereo camera-based system for measuring crack propagation in fibre-reinforced concrete},
author = {Álvaro Mena-Alonso and Pedro Latorre-Carmona and Dorys C. González and José F. Díez-Pastor and Juan J. Rodríguez and Jesús Mínguez and Miguel A. Vicente},
doi = {10.1007/s43452-023-00723-6},
issn = {2083-3318},
year = {2023},
date = {2023-08-00},
urldate = {2023-08-00},
journal = {Archiv.Civ.Mech.Eng},
volume = {23},
number = {3},
publisher = {Springer Science and Business Media LLC},
abstract = {<jats:title>Abstract</jats:title><jats:p>This paper shows a new low-cost technology for the measurement of crack propagation in quasi-fragile materials based on a stereo pair of cameras and LED light spots. The two cameras record the displacement experienced by a series of LED white lights. For each frame, the <jats:italic>X</jats:italic>, <jats:italic>Y</jats:italic> and <jats:italic>Z</jats:italic> 3D coordinates of all the centroids of the LED points are obtained. From this information, it is possible to determine the variation of the distance between any two of them. In this case, 2 strips of 12 LED lights each were arranged in such a way that the points of both strips coincided in pairs in height. The algorithm made it possible to monitor the increase in distance that occurred between each pair of lights at the same height. The paper shows the mathematical basis of this technological solution. A test has been carried out by installing this system in a concrete cube 150 mm side and subjected to a wedge-splitting test. The results show that it is possible to monitor the crack propagation (position of the crack front) during the test and to know the crack width too. At present, the accuracy of this technique is only limited by the camera resolution and the computer processing capability.</jats:p>},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Kuncheva, Ludmila I.; Garrido-Labrador, José Luis; Ramos-Pérez, Ismael; Hennessey, Samuel L.; Rodríguez, Juan J.
An experiment on animal re-identification from video Journal Article
In: Ecological Informatics, vol. 74, 2023, ISSN: 1574-9541.
Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Kuncheva2023,
title = {An experiment on animal re-identification from video},
author = {Ludmila I. Kuncheva and José Luis Garrido-Labrador and Ismael Ramos-Pérez and Samuel L. Hennessey and Juan J. Rodríguez},
doi = {10.1016/j.ecoinf.2023.101994},
issn = {1574-9541},
year = {2023},
date = {2023-05-00},
urldate = {2023-05-00},
journal = {Ecological Informatics},
volume = {74},
publisher = {Elsevier BV},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Barbero-Aparicio, José A.; Olivares-Gil, Alicia; Díez-Pastor, José F.; García-Osorio, César
Deep learning and support vector machines for transcription start site identification Journal Article
In: PeerJ Computer Science, vol. 9, iss. e1340, 2023, ISSN: 2376-5992.
Abstract | Links | BibTeX | Tags: bioinformatics, Convolutional neural network, Deep learning, Long short-term memory, Machine learning, PID2020-119894GB-I00, Support vector machines, transcription start site
@article{barbero-aparicio2023,
title = {Deep learning and support vector machines for transcription start site identification},
author = {José A. Barbero-Aparicio and Alicia Olivares-Gil and José F. Díez-Pastor and César García-Osorio},
editor = {Carlos Fernandez-Lozano},
url = {https://doi.org/10.7717/peerj-cs.1340},
doi = {10.7717/peerj-cs.1340},
issn = {2376-5992},
year = {2023},
date = {2023-04-17},
urldate = {2023-04-17},
journal = {PeerJ Computer Science},
volume = {9},
issue = {e1340},
abstract = {Recognizing transcription start sites is key to gene identification. Several approaches have been employed in related problems such as detecting translation initiation sites or promoters, many of the most recent ones based on machine learning. Deep learning methods have been proven to be exceptionally effective for this task, but their use in transcription start site identification has not yet been explored in depth. Also, the very few existing works do not compare their methods to support vector machines (SVMs), the most established technique in this area of study, nor provide the curated dataset used in the study. The reduced amount of published papers in this specific problem could be explained by this lack of datasets. Given that both support vector machines and deep neural networks have been applied in related problems with remarkable results, we compared their performance in transcription start site predictions, concluding that SVMs are computationally much slower, and deep learning methods, specially long short-term memory neural networks (LSTMs), are best suited to work with sequences than SVMs. For such a purpose, we used the reference human genome GRCh38. Additionally, we studied two different aspects related to data processing: the proper way to generate training examples and the imbalanced nature of the data. Furthermore, the generalization performance of the models studied was also tested using the mouse genome, where the LSTM neural network stood out from the rest of the algorithms. To sum up, this article provides an analysis of the best architecture choices in transcription start site identification, as well as a method to generate transcription start site datasets including negative instances on any species available in Ensembl. We found that deep learning methods are better suited than SVMs to solve this problem, being more efficient and better adapted to long sequences and large amounts of data. We also create a transcription start site (TSS) dataset large enough to be used in deep learning experiments},
keywords = {bioinformatics, Convolutional neural network, Deep learning, Long short-term memory, Machine learning, PID2020-119894GB-I00, Support vector machines, transcription start site},
pubstate = {published},
tppubtype = {article}
}
Setó-Rey, Daniel; Santos-Martín, José Ignacio; López-Nozal, Carlos
Vulnerability of Package Dependency Networks Journal Article
In: IEEE Trans. Netw. Sci. Eng., pp. 1–13, 2023, ISSN: 2327-4697.
Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Setó-Rey2023,
title = {Vulnerability of Package Dependency Networks},
author = {Daniel Setó-Rey and José Ignacio Santos-Martín and Carlos López-Nozal},
doi = {10.1109/tnse.2023.3260880},
issn = {2327-4697},
year = {2023},
date = {2023-00-00},
urldate = {2023-00-00},
journal = {IEEE Trans. Netw. Sci. Eng.},
pages = {1--13},
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
2022
Pimenov, Danil Yurievich; Bustillo, Andrés; Wojciechowski, Szymon; Sharma, Vishal Santosh; Gupta, Munish Kumar; Kuntğlu, Mustafa
Artificial intelligence systems for tool condition monitoring in machining: analysis and critical review Journal Article
In: Journal of Intelligent Manufacturing, vol. 2022, 2022, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: Artificial intelligence, Machining, PID2020-119894GB-I00, Sensor, tool condition monitoring, Tool life, Wear
@article{Pimenov2022,
title = {Artificial intelligence systems for tool condition monitoring in machining: analysis and critical review},
author = {Danil Yurievich Pimenov and Andrés Bustillo and Szymon Wojciechowski and Vishal Santosh Sharma and Munish Kumar Gupta and Mustafa Kuntğlu},
url = {https://link.springer.com/article/10.1007/s10845-022-01923-2#citeas},
doi = {10.1007/s10845-022-01923-2},
issn = {0956-5515},
year = {2022},
date = {2022-03-12},
urldate = {2022-03-12},
journal = {Journal of Intelligent Manufacturing},
volume = {2022},
abstract = {The wear of cutting tools, cutting force determination, surface roughness variations and other machining responses are of keen interest to latest researchers. The variations of these machining responses results in change in dimensional accuracy and productivity upto great extent. In addition, an excessive increase in wear leads to catastrophic consequences, exceeding the tool breakage. Therefore, this article discusses the online trend of modern approaches in tool condition monitoring while different machining operations. For this purpose, the effective use of new sensors and artificial intelligence (AI) is considered and followed during this holistic review work. The sensor systems used for monitoring tool wear are dynamometers, accelerometers, acoustic emission sensors, current and power sensors, image sensors, other sensors. These systems allow to solve the problem of automation and modeling of technological parameters of the main types of cutting, such as turning, milling, drilling and grinding. The modern artificial intelligence methods are considered, such as: Neural networks, Image recognition, Fuzzy logic, Adaptive neuro-fuzzy inference systems, Bayesian Networks, Support vector machine, Ensembles, Decision and regression trees, k-nearest neighbors, Artificial Neural Network, Markov model, Singular Spectrum Analysis, Genetic algorithms. Discussions also includes the main advantages, disadvantages and prospects of using various AI methods for tool wear monitoring. Moreover, the problems and future directions of the main processing methods using AI models are also highlighted.},
keywords = {Artificial intelligence, Machining, PID2020-119894GB-I00, Sensor, tool condition monitoring, Tool life, Wear},
pubstate = {published},
tppubtype = {article}
}
Ramos-Pérez, Ismael; Arnaiz-González, Álvar; Rodríguez, Juan José; García-Osorio, César
When is resampling beneficial for feature selection with imbalanced wide data? Journal Article
In: Expert Systems with Applications, vol. 188, pp. 116015, 2022, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: Feature selection, High dimensional data, Machine learning, PID2020-119894GB-I00, SELECTED, Unbalanced, Very low sample size, Wide data
@article{Ramos-Pérez2022,
title = {When is resampling beneficial for feature selection with imbalanced wide data?},
author = {Ismael Ramos-Pérez and Álvar Arnaiz-González and Juan José Rodríguez and César García-Osorio},
url = {https://www.sciencedirect.com/science/article/pii/S0957417421013622},
doi = {https://doi.org/10.1016/j.eswa.2021.116015},
issn = {0957-4174},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
journal = {Expert Systems with Applications},
volume = {188},
pages = {116015},
abstract = {This paper studies the effects that combinations of balancing and feature selection techniques have on wide data (many more attributes than instances) when different classifiers are used. For this, an extensive study is done using 14 datasets, 3 balancing strategies, and 7 feature selection algorithms. The evaluation is carried out using 5 classification algorithms, analyzing the results for different percentages of selected features, and establishing the statistical significance using Bayesian tests.
Some general conclusions of the study are that it is better to use RUS before the feature selection, while ROS and SMOTE offer better results when applied afterwards. Additionally, specific results are also obtained depending on the classifier used, for example, for Gaussian SVM the best performance is obtained when the feature selection is done with SVM-RFE before balancing the data with RUS.},
keywords = {Feature selection, High dimensional data, Machine learning, PID2020-119894GB-I00, SELECTED, Unbalanced, Very low sample size, Wide data},
pubstate = {published},
tppubtype = {article}
}
Some general conclusions of the study are that it is better to use RUS before the feature selection, while ROS and SMOTE offer better results when applied afterwards. Additionally, specific results are also obtained depending on the classifier used, for example, for Gaussian SVM the best performance is obtained when the feature selection is done with SVM-RFE before balancing the data with RUS.
Olivares-Gil, Alicia; Arnaiz-Rodríguez, Adrián; Ramírez-Sanz, José Miguel; Garrido-Labrador, José Luis; Ahedo, Virginia; García-Osorio, César; Santos, José Ignacio; Galán, José Manuel
Mapping the scientific structure of organization and management of enterprises using complex networks Journal Article
In: Int. J. Prod. Manag. Eng., vol. 10, no. 1, pp. 65–76, 2022, ISSN: 2340-4876.
Abstract | Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Olivares-Gil2022,
title = {Mapping the scientific structure of organization and management of enterprises using complex networks},
author = {Alicia Olivares-Gil and Adrián Arnaiz-Rodríguez and José Miguel Ramírez-Sanz and José Luis Garrido-Labrador and Virginia Ahedo and César García-Osorio and José Ignacio Santos and José Manuel Galán},
doi = {10.4995/ijpme.2022.16666},
issn = {2340-4876},
year = {2022},
date = {2022-01-31},
urldate = {2022-01-31},
journal = {Int. J. Prod. Manag. Eng.},
volume = {10},
number = {1},
pages = {65--76},
publisher = {Universitat Politecnica de Valencia},
abstract = {<jats:p>Understanding the scientific and social structure of a discipline is a fundamental aspect for scientific evaluation processes, identifying trends and niches, and balancing the trade-off between exploitation and exploration in research. In the present contribution, the production of doctoral theses is used as a proxy to analyze the scientific structure of the knowledge area of business organization in Spain. To that end, a complex networks approach is selected, and two different networks are built: (i) the social network of co-participation in thesis examining committees and thesis supervision, and (ii) a bipartite network of theses and thesis descriptors. The former has a modular structure that is partially explained by thematic specialization in different subdisciplines. The latter serves to assess the interdisciplinary structure of the discipline, as it enables the characterization of affinity levels between fields, research poles and thematic clusters. Our results have implications for the scientific evaluation and formal definition of related fields.</jats:p>},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}
Cruz, David Checa; Urbikain, Gorka; Beranoagirre, Aitor; Bustillo, Andrés; Lacalle, Luis Norberto López
Using Machine-Learning techniques and Virtual Reality to design cutting tools for energy optimization in milling operations Journal Article
In: International Journal of Computer Integrated Manufacturing, vol. 35, no. 1, pp. 1-21, 2022, ISSN: 0951-192X.
Abstract | Links | BibTeX | Tags: energy optimization, Ensembles, Multilayer perceptron, PID2020-119894GB-I00, serrated cutters, Virtual Reality
@article{Cruz2022b,
title = {Using Machine-Learning techniques and Virtual Reality to design cutting tools for energy optimization in milling operations},
author = {David Checa Cruz and Gorka Urbikain and Aitor Beranoagirre and Andrés Bustillo and Luis Norberto López Lacalle},
url = {https://www.tandfonline.com/doi/full/10.1080/0951192X.2022.2027020},
doi = {10.1080/0951192X.2022.2027020},
issn = {0951-192X},
year = {2022},
date = {2022-01-19},
urldate = {2022-01-19},
journal = {International Journal of Computer Integrated Manufacturing},
volume = {35},
number = {1},
pages = {1-21},
abstract = {The selection of a proper cutting tool in machining operations is a critical issue. Tool geometric parameters are essential for milling performance. However, the process engineer has very limited experience of the best parameter combination, due to the high cost of cutting tool tests. The same holds true for bachelor studies on machining processes. This study proposes a new strategy that combines experimental tests, machine-learning modelling and Virtual Reality visualization to overcome these limitations. First, tools with different geometric parameters are tested. Second, the experimental data are modeled with different machine-learning techniques (regression trees, multilayer perceptrons, bagging and random forest ensembles). An in-depth analysis of the influence of each input on model accuracy is performed to reduce experimental costs. The results show that the best model with no cutting-force inputs performed worse than the best model with all the inputs. Third, the most accurate model is used to build 3D graphs of special interest to engineering students as well as process engineers, for the optimization of power consumption under different cutting conditions. Finally, a Virtual Reality environment is presented to train engineering students in the study of the best tool design and cutting parameter optimization.},
keywords = {energy optimization, Ensembles, Multilayer perceptron, PID2020-119894GB-I00, serrated cutters, Virtual Reality},
pubstate = {published},
tppubtype = {article}
}
2021
Díez-Pastor, José Francisco; Latorre-Carmona, Pedro; Garrido-Labrador, José Luis; Ramírez-Sanz, José Miguel; Rodríguez, Juan J.
Experimental Assessment of Feature Extraction Techniques Applied to the Identification of Properties of Common Objects, Using a Radar System Journal Article
In: Applied Sciences, vol. 11, no. 15, 2021, ISSN: 2076-3417.
Abstract | Links | BibTeX | Tags: PID2020-119894GB-I00
@article{Díez-Pastor2021b,
title = {Experimental Assessment of Feature Extraction Techniques Applied to the Identification of Properties of Common Objects, Using a Radar System},
author = {José Francisco Díez-Pastor and Pedro Latorre-Carmona and José Luis Garrido-Labrador and José Miguel Ramírez-Sanz and Juan J. Rodríguez},
doi = {10.3390/app11156745},
issn = {2076-3417},
year = {2021},
date = {2021-08-00},
urldate = {2021-08-00},
journal = {Applied Sciences},
volume = {11},
number = {15},
publisher = {MDPI AG},
abstract = {<jats:p>Radar technology has evolved considerably in the last few decades. There are many areas where radar systems are applied, including air traffic control in airports, ocean surveillance, and research systems, to cite a few. Other types of sensors have recently appeared, which allow tracking sub-millimeter motion with high speed and accuracy rates. These millimeter-wave radars are giving rise to myriad new applications, from the recognition of the material close objects are made, to the recognition of hand gestures. They have also been recently used to identify how a person interacts with digital devices through the physical environment (Tangible User Interfaces, TUIs). In this case, the radar is used to detect the orientation, movement, or distance from the objects to the user’s hands or the digital device. This paper presents a thoughtful comparative analysis of different feature extraction techniques and classification strategies applied on a series of datasets that cover problems such as the identification of materials, element counting, or determining the orientation and distance of objects to the sensor. The results outperform previous works using these datasets, especially when the accuracy was lowest, showing the benefits feature extraction techniques have on classification performance.</jats:p>},
keywords = {PID2020-119894GB-I00},
pubstate = {published},
tppubtype = {article}
}