-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLMM _code.R
More file actions
79 lines (62 loc) · 2.87 KB
/
LMM _code.R
File metadata and controls
79 lines (62 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
library(nlme)
library(dplyr)
library(ggplot2)
# load and process data
data <- read.table("Data2.txt",header=TRUE)
# separating data (6 measurements per subject for most subjects, m=481)
time <- c(0,2,4,8,16,24)
calwk <- data$calwk
logrna <- as.numeric(data$logrna)
nnrti <- factor(data$nnrti)
txday <- data$txday
cens <- factor(data$cens)
trtarm <- factor(data$trtarm)
patid <- data$patid
hiv <- data.frame(patid, nnrti, calwk, txday, logrna, cens, trtarm)
set.seed(158)
patients <- seq(1:481)
test_ids <- sample(patients, size = 96, replace = FALSE) # choosing the patient ID's for test data
test_ids <- sort(test_ids)
train_ids <- setdiff(patients, test_ids)
hiv_train <- hiv %>% filter(patid %in% train_ids)
hiv_test <- hiv %>% filter(patid %in% test_ids)
# Linear Mixed-Effect Models: choosing the best LMM to use (random slope models)
# independent within subject correlation structure
lme1 <- lme(logrna ~ nnrti + calwk + txday + cens + trtarm,
data = hiv_train, random = ~ calwk | patid, method = 'ML')
summary(lme1)
# compound symmetry structure for within subject correlation
lme2 <- update(lme1, correlation = corCompSymm(form = ~ 1 | patid))
summary(lme2)
# fewer fixed effects, random slope with independent within subject correlation
lme3 <- lme(logrna ~ trtarm * calwk, data = hiv_train,
random = ~calwk | patid, method = 'ML')
summary(lme3)
lme4 <- update(lme3, correlation = corCompSymm(form = ~ 1 | patid))
summary(lme4)
anova(lme1, lme2, lme3, lme4) # we choose lme1 based on AIC/BIC
# improve parameter estimates by using REML instead of ML
lme5 <- update(lme1, method = 'REML')
summary(lme5) # shows the fitted parameters for the chosen LMM
# predictions
lme5_pred <- predict(lme5, newdata = hiv_test, level = 0) # level asserts that we only use fixed effects in prediction, since expectation of random components is 0
# plot predictions vs actual responses
D5 <- data.frame(real_values = hiv_test$logrna, Predictions = lme5_pred)
ggplot(D5, aes(x = Predictions, y = real_values)) +
geom_point(alpha = 0.6, aes(color = 'Data points')) +
geom_smooth(method = "lm", se = FALSE, aes(color = 'Regression line')) + # best fit line
geom_line(linewidth = 1, data = data.frame(x = range(D5$Predictions), y = range(D5$Predictions)), aes(x=x, y=y, color = 'Identity line')) +
scale_color_manual(name = 'Legend', values = c('Data points'= 'deeppink3', 'Regression line' = 'black', 'Identity line' = 'green')) +
labs(
x = "Predicted viral load", y = "Actual viral load",
title =
"Predicted vs Actual viral loads for the testing data (LMM)") +
theme_minimal()
# mse
mse_lmm <- mean((lme5_pred - hiv_test$logrna)^2)
mse_lmm
# mae
mae_lmm <- mean(abs(lme5_pred - hiv_test$logrna))
mae_lmm
# correlation between predicted and actual responses
cor(hiv_test$logrna, lme5_pred)