library(tidyverse)
library(mvtnorm)
library(monomvn)
library(coda)
= read_csv("https://sta360-fa23.github.io/data/Pima.csv") %>%
Y as.matrix()
colnames(Y) = NULL
Inference under MVN with missing data
Example: precision medicine
This example is from Hoff ch. 7.
Load libraries and data.
This data set contains
glu
blood plasma glucose concentrationbp
diastolic blood pressureskin
skin fold thicknessbmi
body mass index
for 200 women of Pima Indian heritage living near Phoenix, Arizona (Smith et al, 1988). Some observations are missing.
Inference using Gibbs sampling
Setup prior parameters and starting values.
## prior parameters
= nrow(Y); p = ncol(Y)
n # prior on theta
= c(120, 64, 26, 26); sd0 = (mu0 / 2)
mu0 = matrix(.1, p, p)
L0 diag(L0) = 1
= L0 * outer(sd0, sd0) # \Lambda_0
L0 # prior on Sigma
= p + 2;
nu0 = L0
S0 ###
### starting values
= S0
Sigma = Y
Y.full = 1 * (!is.na(Y)) # indices for observe values of Y
O
for(j in 1:p) {
is.na(Y.full[,j]),j]<-mean(Y.full[,j],na.rm=TRUE)
Y.full[ }
Exercise: what does the code above set the starting values of \(\theta, \Sigma\) and \(\textbf{Y}_{\text{mis}}\) to?
The Gibbs sampler.
### Gibbs sampler
<- SIGMA <- Y.MISS <- NULL
THETA set.seed(360)
for(s in 1:1000) {
###update theta
<- apply(Y.full, 2 , mean)
ybar <- solve(solve(L0) + n * solve(Sigma))
Ln <- Ln %*% (solve(L0) %*% mu0 + n * solve(Sigma) %*% ybar)
mun <- rmvnorm(1, mun, Ln)
theta ###
###update Sigma
<- S0 + (t(Y.full) - c(theta)) %*% t(t(Y.full) - c(theta))
Sn <- rwish(nu0 + n, solve(Sn))
Sigma ###
###update missing data
for(i in 1:n) {
<- (O[i, ] == 0)
b <- (O[i, ] == 1)
a if( sum(b) != 0) {
<- solve(Sigma[a, a])
iSa <- Sigma[b, a] %*% iSa
beta.j <- Sigma[b, b] - Sigma[b, a] %*% iSa %*% Sigma[a, b]
s2.j <- theta[b] + beta.j %*% (as.matrix(Y.full[i, a]) - theta[a])
theta.j <- rmvnorm(1, theta.j, s2.j)
Y.full[i, b]
}
}
### save results
<-rbind(THETA,theta) ; SIGMA<-rbind(SIGMA,c(Sigma))
THETA<-rbind(Y.MISS, Y.full[O==0] )
Y.MISS###
if(s %% 250 == 0 | s == 1) {
cat(s,theta,"\n")
} }
1 129.6031 71.66437 28.97947 31.385
250 123.6105 70.56862 29.10075 30.97481
500 123.6067 70.36211 29.20791 31.03185
750 123.6179 70.78825 29.29216 30.90945
1000 123.6017 70.15369 29.22685 31.08064
#### Posterior mean
apply(THETA,2,mean)
[1] 123.60189 70.57085 29.16132 31.79352
Effective sample size of THETA
# effective sample size of THETA
apply(THETA, 2, effectiveSize)
[1] 879.4569 3680.4806 4411.2164 7721.7664