@article{10.37349/edht.2024.00022,
abstract = {Aim: COVID-19 has affected more than 223 countries worldwide and in the post-COVID era, there is a pressing need for non-invasive, low-cost, and highly scalable solutions to detect COVID-19. This study focuses on the analysis of voice features and machine learning models in the automatic detection of COVID-19. Methods: We develop a deep learning model to identify COVID-19 from voice recording data. The novelty of this work is in the development of deep learning models for COVID-19 identification from only voice recordings. We use the Cambridge COVID-19 Sound database which contains 893 speech samples, crowd-sourced from 4,352 participants via a COVID-19 Sounds app. Voice features including Mel-spectrograms and Mel-frequency cepstral coefficients (MFCC) and convolutional neural network (CNN) Encoder features are extracted. Based on the voice data, we develop deep learning classification models to detect COVID-19 cases. These models include long short-term memory (LSTM), CNN and Hidden-Unit BERT (HuBERT). Results: We compare their predictive power to baseline machine learning models. HuBERT achieves the highest accuracy of 86% and the highest AUC of 0.93. Conclusions: The results achieved with the proposed models suggest promising results in COVID-19 diagnosis from voice recordings when compared to the results obtained from the state-of-the-art.},
author = {Yan, Yuyang and Aljbawi, Wafaa and Simons, Sami O. and Urovi, Visara},
doi = {10.37349/edht.2024.00022},
journal = {Exploration of Digital Health Technologies},
pages = {202--217},
title = {{Developing a multi-variate prediction model for COVID-19 from crowd-sourced respiratory voice data}},
url = {https://www.explorationpub.com/Journals/edht/Article/101122},
volume = {2},
year = {2024},
number = {4}
}