2025
Maestro-Prieto, José Alberto; Romero, Pablo E.; Sanz, José Miguel Ramírez
Semi-supervised techniques to address the scarcity of experimental data: a case study of single point incremental forming Journal Article
In: Journal of Intelligent Manufacturing, 2025, ISSN: 0956-5515.
Abstract | Links | BibTeX | Tags: mulitiple data source, Semi-supervised learning, single point incremental forming, SPIF, surface roughness
@article{maestro-prieto2025,
title = {Semi-supervised techniques to address the scarcity of experimental data: a case study of single point incremental forming},
author = {José Alberto Maestro-Prieto and Pablo E. Romero and José Miguel Ramírez Sanz},
url = {https://doi.org/10.1007/s10845-025-02704-3},
doi = {10.1007/s10845-025-02704-3},
issn = {0956-5515},
year = {2025},
date = {2025-10-31},
urldate = {2025-10-31},
journal = { Journal of Intelligent Manufacturing},
abstract = {A lack of experimental data can be especially critical in new manufacturing processes. Although experimental datasets for industrial processes are reported in various research works, their lack of homogeneity complicates any fitting with conventional numerical models. Artificial Intelligence (AI) models can be an optimal alternative to extract useful information from those unconnected datasets, while generating models that can help explain the hidden patterns within datasets and interpret the predictions of the model for final users. Moreover, an AI algorithm that could be trained with limited labeled datasets would be in high demand, as it could effectively lower implementation costs. Semi-Supervised Learning (SSL) techniques might therefore be a promising solution to respond to industrial demand for the analysis of manufacturing processes. In this research, the use of SSL techniques is proposed in a case study of surface quality prediction in single point incremental forming, a promising new manufacturing technique. Datasets were extracted from the existing bibliography to generate a 234-instance dataset with 4 different industrial specifications of roughness. The best results were obtained using a semi-supervised Co-Training algorithm. Semi-supervised methods systematically improved the results obtained with the reference supervised methods, although statistical significance has not been mainly achieved due to the limited dataset size. The results obtained with the unbalanced dataset were very promising for its industrial implementation with an extended training dataset optimized for the range of process conditions of each end-user.},
keywords = {mulitiple data source, Semi-supervised learning, single point incremental forming, SPIF, surface roughness},
pubstate = {published},
tppubtype = {article}
}
Maestro-Prieto, José Alberto; Gil-Del-Val, Alain; Bustillo, Andrés
Semi-supervised tapping wear detection in nodular cast-iron workpieces under real industrial condition Journal Article
In: International Journal of Advanced Manufacturing Technology , 2025, ISSN: 0268-3768.
Abstract | Links | BibTeX | Tags: fault detection, Semi-supervised learning, tapping, Wear
@article{maestro-prieto2025b,
title = {Semi-supervised tapping wear detection in nodular cast-iron workpieces under real industrial condition},
author = {José Alberto Maestro-Prieto and Alain Gil-Del-Val and Andrés Bustillo},
url = {https://link.springer.com/article/10.1007/s00170-025-16491-x},
doi = {10.1007/s00170-025-16491-x},
issn = {0268-3768},
year = {2025},
date = {2025-09-19},
urldate = {2025-09-19},
journal = {International Journal of Advanced Manufacturing Technology },
abstract = {The tapping of metal components is a manufacturing task with great potential for automation, because the conditions affecting the industrial components are of limited variability. However, automation encounters two main problems: both the human- and the time-related costs associated with the manual classification of threads are excessive, and thread quality can vary greatly, due to tapping tool wear. In this study, the use of semi-supervised algorithms is proposed to improve the performance of machine learning–based models trained on real industrial datasets. The strategy was validated on a dataset of more than 7000 threads produced with 36 different tapping tools under the same working conditions involving nodular cast iron workpieces. Several algorithms were trained using datasets with different features and data processing. The best results were obtained with datasets using linear regression in which sinusoidal fluctuations in the raw signals were replaced by linear regressions and the slope of an 11-element rolling window was applied to extend the raw dataset. Algorithms were trained with different percentages of labeled datasets. The co-training-based algorithms almost systematically obtained the best results, yielding better results than the reference algorithms using a 100% labeled dataset. Besides, the proposed solution also achieved higher performance with 50% of labeled instances in the training dataset, drastically reducing the costs of manual labeling for that sort of industrial dataset.},
keywords = {fault detection, Semi-supervised learning, tapping, Wear},
pubstate = {published},
tppubtype = {article}
}
2024
Maestro-Prieto, Jose Alberto; Ramírez-Sanz, José Miguel; Andrés Bustillo, and Juan José Rodriguez-Díez
Semi-supervised diagnosis of wind-turbine gearbox misalignment and imbalance faults Journal Article
In: Applied Intelligence, 2024, ISSN: 1573-7497.
Abstract | Links | BibTeX | Tags: Bearing failures, Fault detection and diagnosis, PID2020-119894GB-I00, Powertrain failures, Semi-supervised learning, SSL, Wind turbine
@article{Maestro-Prieto2024,
title = {Semi-supervised diagnosis of wind-turbine gearbox misalignment and imbalance faults},
author = {Jose Alberto Maestro-Prieto and José Miguel Ramírez-Sanz and Andrés Bustillo,and Juan José Rodriguez-Díez},
url = {https://doi.org/10.1007/s10489-024-05373-6},
doi = {10.1007/s10489-024-05373-6},
issn = {1573-7497},
year = {2024},
date = {2024-03-28},
urldate = {2024-03-28},
journal = {Applied Intelligence},
abstract = {Both wear-induced bearing failure and misalignment of the powertrain between the rotor and the electrical generator are common failure modes in wind-turbine motors. In this study, Semi-Supervised Learning (SSL) is applied to a fault detection and diagnosis solution. Firstly, a dataset is generated containing both normal operating patterns and seven different failure classes of the two aforementioned failure modes that vary in intensity. Several datasets are then generated, maintaining different numbers of labeled instances and unlabeling the others, in order to evaluate the number of labeled instances needed for the desired accuracy level. Subsequently, different types of SSL algorithms and combinations of algorithms are trained and then evaluated with the test data. The results showed that an SSL approach could improve the accuracy of trained classifiers when a small number of labeled instances were used together with many unlabeled instances to train a Co-Training algorithm or combinations of such algorithms. When a few labeled instances (fewer than 10% or 327 instances, in this case) were used together with unlabeled instances, the SSL algorithms outperformed the result obtained with the Supervised Learning (SL) techniques used as a benchmark. When the number of labeled instances was sufficient, the SL algorithm (using only labeled instances) performed better than the SSL algorithms (accuracy levels of 87.04% vs. 86.45%, when labeling 10% of instances). A competitive accuracy of 97.73% was achieved with the SL algorithm processing a subset of 40% of the labeled instances.},
keywords = {Bearing failures, Fault detection and diagnosis, PID2020-119894GB-I00, Powertrain failures, Semi-supervised learning, SSL, Wind turbine},
pubstate = {published},
tppubtype = {article}
}
Martin-Melero, Íñigo; Serrano-Mamolar, Ana; Rodríguez-Diez, Juan J.
Evaluation of Semi-Supervised Machine Learning applied to Affective State Detection Proceedings Article
In: IEEE, 2024.
Links | BibTeX | Tags: p_humanaid, PID2020-119894GB-I00, Semi-supervised learning, SSL
@inproceedings{Martin-Melero2024,
title = {Evaluation of Semi-Supervised Machine Learning applied to Affective State Detection},
author = {Íñigo Martin-Melero and Ana Serrano-Mamolar and Juan J. Rodríguez-Diez},
doi = {10.1109/percomworkshops59983.2024.10502901},
year = {2024},
date = {2024-03-11},
urldate = {2024-03-11},
publisher = {IEEE},
keywords = {p_humanaid, PID2020-119894GB-I00, Semi-supervised learning, SSL},
pubstate = {published},
tppubtype = {inproceedings}
}
Garrido-Labrador, José Luis; Serrano-Mamolar, Ana; Maudes-Raedo, Jesús; Rodríguez, Juan José; García-Osorio, César
Ensemble methods and semi-supervised learning for information fusion: A review and future research directions Journal Article
In: Information Fusion, vol. 107, 2024.
Abstract | Links | BibTeX | Tags: Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, PID2020-119894GB-I00, Research trends, Semi-supervised ensemble classification, Semi-supervised learning, SSL
@article{garrido2024ensemble,
title = {Ensemble methods and semi-supervised learning for information fusion: A review and future research directions},
author = {José Luis Garrido-Labrador and Ana Serrano-Mamolar and Jesús Maudes-Raedo and Juan José Rodríguez and César García-Osorio},
url = {https://doi.org/10.1016/j.inffus.2024.102310},
doi = {10.1016/j.inffus.2024.102310},
year = {2024},
date = {2024-02-02},
urldate = {2024-02-02},
journal = {Information Fusion},
volume = {107},
abstract = {Advances over the past decade at the intersection of information fusion methods and Semi-Supervised Learning (SSL) are investigated in this paper that grapple with challenges related to limited labelled data. To do so, a bibliographic review of papers published since 2013 is presented, in which ensemble methods are combined with new machine learning algorithms. A total of 128 new proposals using SSL algorithms for ensemble construction are identified and classified. All the methods are categorised by approach, ensemble type, and base classifier. Experimental protocols, pre-processing, dataset usage, unlabelled ratios, and statistical tests are also assessed, underlining the major trends, and some shortcomings of particular studies. It is evident from this literature review that foundational algorithms such as self-training and co-training are influencing current developments, and that innovative ensemble …
},
keywords = {Bibliographic review, Ensemble learning, Experimental protocol, Information fusion, Label scarsity, PID2020-119894GB-I00, Research trends, Semi-supervised ensemble classification, Semi-supervised learning, SSL},
pubstate = {published},
tppubtype = {article}
}
Barbero-Aparicio, José A.; Olivares-Gil, Alicia; Rodríguez, Juan J.; García-Osorio, César; Díez-Pastor, José F.
Addressing data scarcity in protein fitness landscape analysis: A study on semi-supervised and deep transfer learning techniques Journal Article
In: Information Fusion, vol. 102, pp. 102035, 2024, ISSN: 1566-2535.
Abstract | Links | BibTeX | Tags: bioinformatics, Machine learning, PID2020-119894GB-I00, Protein fitness prediction, Semi-supervised learning, Small datasets, SSL, Transfer learning
@article{barbero-aparicio2023b,
title = {Addressing data scarcity in protein fitness landscape analysis: A study on semi-supervised and deep transfer learning techniques},
author = {José A. Barbero-Aparicio and Alicia Olivares-Gil and Juan J. Rodríguez and César García-Osorio and José F. Díez-Pastor},
url = {https://www.sciencedirect.com/science/article/pii/S1566253523003512},
doi = {10.1016/j.inffus.2023.102035},
issn = {1566-2535},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Information Fusion},
volume = {102},
pages = {102035},
abstract = {This paper presents a comprehensive analysis of deep transfer learning methods, supervised methods, and semi-supervised methods in the context of protein fitness prediction, with a focus on small datasets. The analysis includes the exploration of the combination of different data sources to enhance the performance of the models. While deep learning and deep transfer learning methods have shown remarkable performance
in situations with abundant data, this study aims to address the more realistic scenario faced by wet lab researchers, where labeled data is often limited. The novelty of this work lies in its examination of deep transfer learning in the context of small datasets and its consideration of semi-supervised methods and multi-view strategies. While previous research has extensively explored deep transfer learning in large dataset scenarios, little attention has been given to its efficacy in small dataset settings or its comparison with semi-supervised approaches. Our findings suggest that deep transfer learning, exemplified by ProteinBERT, shows promising performance in this context compared to the rest of the methods across various evaluation metrics, not only in small dataset contexts but also in large dataset scenarios. This highlights the robustness and versatility of deep transfer learning in protein fitness prediction tasks, even with limited labeled data. The results of this study shed light on the potential of deep transfer learning as a state-of-the-art approach in the field of protein fitness prediction. By leveraging pre-trained models and fine-tuning them on small datasets, researchers can achieve competitive performance surpassing traditional supervised and semi-supervised methods. These findings provide valuable insights for wet lab researchers who face the challenge of limited labeled data, enabling them to make informed decisions when selecting the most effective methodology for their specific protein fitness prediction tasks. Additionally, the study investigated the combination of two different sources of information (encodings) through our enhanced semi-supervised methods, yielding noteworthy results improving their base model and providing valuable insights for further research. The presented analysis contributes to a better understanding of the capabilities and limitations of different learning approaches in small dataset scenarios, ultimately aiding in the development of improved protein fitness prediction methods},
keywords = {bioinformatics, Machine learning, PID2020-119894GB-I00, Protein fitness prediction, Semi-supervised learning, Small datasets, SSL, Transfer learning},
pubstate = {published},
tppubtype = {article}
}
in situations with abundant data, this study aims to address the more realistic scenario faced by wet lab researchers, where labeled data is often limited. The novelty of this work lies in its examination of deep transfer learning in the context of small datasets and its consideration of semi-supervised methods and multi-view strategies. While previous research has extensively explored deep transfer learning in large dataset scenarios, little attention has been given to its efficacy in small dataset settings or its comparison with semi-supervised approaches. Our findings suggest that deep transfer learning, exemplified by ProteinBERT, shows promising performance in this context compared to the rest of the methods across various evaluation metrics, not only in small dataset contexts but also in large dataset scenarios. This highlights the robustness and versatility of deep transfer learning in protein fitness prediction tasks, even with limited labeled data. The results of this study shed light on the potential of deep transfer learning as a state-of-the-art approach in the field of protein fitness prediction. By leveraging pre-trained models and fine-tuning them on small datasets, researchers can achieve competitive performance surpassing traditional supervised and semi-supervised methods. These findings provide valuable insights for wet lab researchers who face the challenge of limited labeled data, enabling them to make informed decisions when selecting the most effective methodology for their specific protein fitness prediction tasks. Additionally, the study investigated the combination of two different sources of information (encodings) through our enhanced semi-supervised methods, yielding noteworthy results improving their base model and providing valuable insights for further research. The presented analysis contributes to a better understanding of the capabilities and limitations of different learning approaches in small dataset scenarios, ultimately aiding in the development of improved protein fitness prediction methods

