Citation

BibTex format

@article{Manley:2026:10.1186/s12879-026-13247-0,
author = {Manley, H and Leber, W and Smith, K and Farooq, HZ and Pareek, M and Baggaley, RF and Anderson, J and Loman, L and Griffiths, C and Robson, J and Panovska-Griffiths, J},
doi = {10.1186/s12879-026-13247-0},
journal = {BMC Infect Dis},
title = {Application of machine-learning algorithms to identify the key determinants of risk for HIV, hepatitis C and hepatitis B in primary care settings.},
url = {http://dx.doi.org/10.1186/s12879-026-13247-0},
year = {2026}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - BACKGROUND: Testing for Blood-Borne-Viruses (BBVs) such as the human immunodeficiency virus (HIV), hepatitis C virus (HCV) and hepatitis B virus (HBV) is generally focused on specialist settings. However, people with undiagnosed infections are also present within the general population. We explore whether using machine-learning algorithms (MLAs) can identify people at heightened risk of HIV, HBV, HCV, or a composite 'any BBV' (defined as positivity for one or more of the three infections) in primary care settings. METHODS: From de-identified electronic health records data from 165 general practices in North East London we extracted risk factors for HIV, HCV and HBV and used them to train (75% data) and test (25% data) three MLAs: Logistic Regression (LR), AdaBoost with random under sampling (RUSBoost) and Balanced Random Forest classifier (BRFC). The ROC curves, ROC AUC, sensitivity and specificity values quantified the models' performance. Across the models the key features for each outcome were identified. RESULTS: A total of 1,987,954 patients were included in the study with no inclusion or exclusion criteria, from whom 75 predictive features were selected for HIV, 24 for HCV, 37 for HBV and 88 for any BBV outcome. Different models were optimal for individual BBVs positivity classification, depending on the accuracy metric. As a single infection, HCV was predicted most accurately across models and accuracy metrics. When targeting any BBV outcome, LR was the model with highest AUC value, BRFC was the most sensitive model and RUSBoost was the most specific model. The key identified features were similar across models with age the strongest predictor for both individual positivity and the composite outcome. A number of features were important for two of the BBV positive groups: Black African ethnicity (HIV and HBV), liver disease (HBV and HCV) and opiate and cocaine use (HBV and HCV). A number of individual features were important for individual BBVs positivity. CON
AU - Manley,H
AU - Leber,W
AU - Smith,K
AU - Farooq,HZ
AU - Pareek,M
AU - Baggaley,RF
AU - Anderson,J
AU - Loman,L
AU - Griffiths,C
AU - Robson,J
AU - Panovska-Griffiths,J
DO - 10.1186/s12879-026-13247-0
PY - 2026///
TI - Application of machine-learning algorithms to identify the key determinants of risk for HIV, hepatitis C and hepatitis B in primary care settings.
T2 - BMC Infect Dis
UR - http://dx.doi.org/10.1186/s12879-026-13247-0
UR - https://www.ncbi.nlm.nih.gov/pubmed/42115877
ER -