library(tidyverse)
library(mvtnorm)
library(monomvn)
library(coda)
Y = read_csv("https://sta360-fa23.github.io/data/Pima.csv") %>%
as.matrix()
colnames(Y) = NULLInference under MVN with missing data
Example: precision medicine
This example is from Hoff ch. 7.
Load libraries and data.
This data set contains
glublood plasma glucose concentrationbpdiastolic blood pressureskinskin fold thicknessbmibody mass index
for 200 women of Pima Indian heritage living near Phoenix, Arizona (Smith et al, 1988). Some observations are missing.
Inference using Gibbs sampling
Setup prior parameters and starting values.
## prior parameters
n = nrow(Y); p = ncol(Y)
# prior on theta
mu0 = c(120, 64, 26, 26); sd0 = (mu0 / 2)
L0 = matrix(.1, p, p)
diag(L0) = 1
L0 = L0 * outer(sd0, sd0) # \Lambda_0
# prior on Sigma
nu0 = p + 2;
S0 = L0
###
### starting values
Sigma = S0
Y.full = Y
O = 1 * (!is.na(Y)) # indices for observe values of Y
for(j in 1:p) {
Y.full[is.na(Y.full[,j]),j]<-mean(Y.full[,j],na.rm=TRUE)
} Exercise: what does the code above set the starting values of \(\theta, \Sigma\) and \(\textbf{Y}_{\text{mis}}\) to?
The Gibbs sampler.
### Gibbs sampler
THETA <- SIGMA <- Y.MISS <- NULL
set.seed(360)
for(s in 1:1000) {
###update theta
ybar <- apply(Y.full, 2 , mean)
Ln <- solve(solve(L0) + n * solve(Sigma))
mun <- Ln %*% (solve(L0) %*% mu0 + n * solve(Sigma) %*% ybar)
theta <- rmvnorm(1, mun, Ln)
###
###update Sigma
Sn <- S0 + (t(Y.full) - c(theta)) %*% t(t(Y.full) - c(theta))
Sigma <- rwish(nu0 + n, solve(Sn))
###
###update missing data
for(i in 1:n) {
b <- (O[i, ] == 0)
a <- (O[i, ] == 1)
if( sum(b) != 0) {
iSa <- solve(Sigma[a, a])
beta.j <- Sigma[b, a] %*% iSa
s2.j <- Sigma[b, b] - Sigma[b, a] %*% iSa %*% Sigma[a, b]
theta.j <- theta[b] + beta.j %*% (as.matrix(Y.full[i, a]) - theta[a])
Y.full[i, b] <- rmvnorm(1, theta.j, s2.j)
}
}
### save results
THETA<-rbind(THETA,theta) ; SIGMA<-rbind(SIGMA,c(Sigma))
Y.MISS<-rbind(Y.MISS, Y.full[O==0] )
###
if(s %% 250 == 0 | s == 1) {
cat(s,theta,"\n")
}
}1 129.6031 71.66437 28.97947 31.385
250 123.6105 70.56862 29.10075 30.97481
500 123.6067 70.36211 29.20791 31.03185
750 123.6179 70.78825 29.29216 30.90945
1000 123.6017 70.15369 29.22685 31.08064
#### Posterior mean
apply(THETA,2,mean)[1] 123.60189 70.57085 29.16132 31.79352
Effective sample size of THETA
# effective sample size of THETA
apply(THETA, 2, effectiveSize)[1] 879.4569 3680.4806 4411.2164 7721.7664