Solved – jags missing data error

bayesianjagsr

I am estimating a probit in jags on a time series cross sectional data set (country years) with 5 covariates, 4 of which are lagged, so the first observation for each country will be missing on those 4 variables. There is some scattered missingness elsewhere in the data, but nothing too bad. All of the variables are of numeric type, and missing values are coded as 'NA'. The model works fine when I omit all missing values, but when I don't I get the error

Compiling model graph
Resolving undeclared variables
Allocating nodes
Deleting model
Error in jags.model("model.bug", data.jags, n.chains = 4, n.adapt = 1000) : 
RUNTIME ERROR:
Compilation error on line 4.
Unable to resolve node Xbeta[1]
This may be due to an undefined ancestor node or a directed cycle in the graph

Here is my data loading code

data.all <- read.csv("http://dl.dropbox.com/u/1253073/blowback-all.csv")

data.jags <- list('onset.b.lag' = data.all$onset.b.lag,
                  'ongoing' = data.all$ongoing,
                  'peace' = data.all$peace,
                  'ln.gdppc.lag' = data.all$ln.gdppc.lag,
                  'all.lag' = data.all$all.lag,
                  'onset' = data.all$onset,
                  'n' = nrow(data.all))

rjags call:

mod.all <- jags.model('model.bug', data.jags, n.chains = 4, n.adapt = 1000)

Here is my model code:

model{

for (i in 1:n) {
probit(p[i]) <- Xbeta[i]
onset[i] ~ dbern(p[i])
Xbeta[i] <- b.0 +
            b.onset.b.lag * onset.b.lag[i] +
            b.peace * peace[i] +
    b.ln.gdppc.lag * ln.gdppc.lag[i] +
    b.ongoing * ongoing[i] +
    b.all.lag * all.lag[i]
}

b.0 ~ dnorm(0, .0001)
b.peace ~ dnorm(0, .0001)
b.onset.b.lag ~ dnorm(0, .0001)
b.ln.gdppc.lag ~ dnorm(0, .0001)
b.ongoing ~ dnorm(0, .0001)
b.all.lag ~ dnorm(0, .0001)

}

My understanding is that when jags encounters an index where a variable is coded NA it takes a draw from the posterior. Any hints would be appreciated.

Best Answer

My understanding is that if the outcome is NA then it will fill it in from the posterior predictive. NA in the predictors is not allowed, and must be imputed.

Related Solutions

Solved – Jags error with dgamma

I have received these errors when not using appropriate initial values for my parameters. Rather than writing functions to generate initial values (which I presume you have done), I would provide starting values that you know are consistent with one another.

Solved – Compilation error in JAGS

I took out the nested loop to make it clearer for me and stuck the prior for delta in its own loop, giving this for mu: delta[i] -> delta[id[r]]
note the explicit id vector for municipalities that is as long as the data which I've labelled 'id'

I think that's the biggest change I made? Model below (also some extras in case you want to include them).

require(rjags)
y   <- c(1.105808,    1.000000,    5.304166,   33.665875,  139.865451, 109.033703,  176.639245,    1.000000,   28.521134,   44.281121 ,150.478570,   18.465554,   85.096431,   81.907537,  124.631226, 1.000000 ,  11.237294 ,  20.480519  , 68.642176 ,  30.047630,  54.051613 , 134.068889  , 72.215041 ,   1.000000 ,  31.254480,   6.226026  , 54.340496 , 161.667352 ,1345.948800 , 147.404744,  192.966923   , 1.000000  ,  1.150755 ,   1.000000  , 99.477430,  72.592598,  107.493014 , 130.201416 , 147.387423 ,   1.000000,   27.534944  ,  8.492657 ,  38.155558,   44.301978 ,  81.938633,   75.026848  ,144.926523 ,   1.000000)
x1  <- c(7.878865,   9.117159 ,  9.998539 , 10.300563 , 12.683197 , 12.185060, 101.346385, 168.814861,   4.769977,   4.803769,   5.990192,   6.469412,   7.557664,   9.781595, 264.102447 ,702.321019 ,  5.663501 ,  6.319843,   6.643405 ,  7.147517 ,  8.154099 ,  8.811370 , 64.089236, 205.728163,   7.218225 ,  6.905615 ,  9.341990 ,  8.554343 , 15.873037 , 15.731554, 227.589294 ,398.435765 ,  6.217681 , 10.498929 , 11.088663 , 10.312797,  11.483123   ,9.276521 ,157.311069 ,391.279665 ,  3.985544 ,  4.389385,  4.663445 ,  4.934453  , 6.622184  , 7.770833 , 21.700911 , 33.470576)
x2  <- c(0.4225000, 0.6619411 ,0.5401000, 0.5138000, 0.5109000, 0.6325681, 0.6425919, 0.6466943, 0.4421000, 0.5870430, 0.5254000, 0.5525000, 0.5392000, 0.6330954, 0.6457942, 0.6582531, 0.4039000, 0.6154006, 0.4620000, 0.4875000, 0.5439000, 0.6494275, 0.6423681, 0.6450814, 0.5110000, 0.6060935, 0.5050000, 0.5200000, 0.5535000, 0.6383363, 0.6684222 ,0.6465682, 0.3963000, 0.5914320, 0.4819000, 0.5361000, 0.5886000, 0.6140674 ,0.6240171, 0.6150484, 0.4797000, 0.6211242, 0.5705000, 0.5709000, 0.6144000 ,0.6412593, 0.6611542, 0.6364444)
id  <- rep(1:6, each=8)
df1 <- data.frame(x1, x2, y, id)
x   <- cbind(x1, x2)

# Data, Initial Values, and Parameters
N     <- length(unique(id))
W     <- dim(x)[2]
data  <- list(Ndata = length(y), N=N, M=6, W=W, y=y, x=x,id=id, m=c(-2,2), prec=c(.2,.2), tau.a=1, tau.b=2, tau.a2=1, tau.b2=2, mbeta0=1, precbeta0=.01)
inits <- list(beta0=0, beta=c(1,1), tau=1)
param <- c("beta0", "beta", "sigma", "delta")

# THE MODEL.
modelstring = "
model {
    for( r in 1 : Ndata ) {
        y[r] ~ dlnorm( mu[r] , tau ) #tau[subj[r]]
        mu[r] <- beta0 + delta[ id[r] ] + inprod(beta[] , x[r,]) #b1[ subj[r] ] * x1[r] + b2[ subj[r] ] * x2[r] - for subject varying slopes
    }
        beta0 ~ dnorm(mbeta0, precbeta0)
    for ( s in 1 : N ) {
        delta[s]  ~ dnorm(0, tau2)
#        b1[s]     ~ dnorm(m[l], prec[l]) #for subject varying slopes that do not correlate with the intercept
#        b2[s]     ~ dnorm(m[l], prec[l]) 
#        tau[s]    ~ dgamma( sG , rG ) #for subject varying tau in the likelihood
    }
    for (l in 1:W) {
        beta[l] ~ dnorm(m[l], prec[l])
    } 
    tau ~ dgamma(tau.a, tau.b)   
    sigma <- 1/sqrt(tau)
    tau2 ~ dgamma(tau.a2, tau.b2)
    sigma2 <- 1/sqrt(tau2)
}
" # close quote for modelstring
writeLines(modelstring,con="model.txt")

adaptSteps = 500              # Number of steps to "tune" the samplers.
burnInSteps = 500             # Number of steps to "burn-in" the samplers.
nChains = 3                   # Number of chains to run.
numSavedSteps=50000           # Total number of steps in chains to save.
thinSteps=1                   # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.

jagsModel = jags.model( "model.txt" , data=data , inits=inits , 
                    n.chains=nChains , n.adapt=adaptSteps )
cat( "Burning in the MCMC chain...\n" )
update( jagsModel , n.iter=burnInSteps )
cat( "Sampling final MCMC chain...\n" )
codaSamples = coda.samples( jagsModel , variable.names=param , 
                        n.iter=nPerChain , thin=thinSteps )
mcmcChain1 = as.matrix( codaSamples )
apply(mcmcChain1, 2, summary)

# check with lmer
require(lme4)
yl=log(y)
lmerx <- lmer(yl ~ x1 + x2 + (1|id))
lmerx

## User1172558 Model
modelstring = " 
    model {
    for(i in 1:N) {
        for (j in 1:M) {
            y[(i-1)*8+j] ~ dlnorm(mu[(i-1)*8+j], tau)
            mu[(i-1)*8+j] <- beta0 + delta[i] + inprod( beta[], x[(i-1)*8+j , ])
        }
        delta[i] ~ dnorm(0, tau2)
    }

    # Prior for betas
    beta0 ~ dnorm(mbeta0, precbeta0)
    for (l in 1:W) {
        beta[l] ~ dnorm(m[l], prec[l])
    }

    # Prior for precision of Y
    tau ~ dgamma(tau.a, tau.b)
    sigma <- 1/sqrt(tau)

    # Prior for precision of Delta
    tau2 ~ dgamma(tau.a2, tau.b2)
    sigma2 <- 1/sqrt(tau2)
}
" 
writeLines(modelstring, con="model.txt")
# Create, initialize, and adapt the model:
jagsModel = jags.model( "model.txt" , data=data , inits=inits , 
                    n.chains=nChains , n.adapt=adaptSteps )
update( jagsModel , n.iter=burnInSteps )
codaSamples = coda.samples( jagsModel , variable.names=param , 
                        n.iter=nPerChain , thin=thinSteps )
mcmcChain2 = as.matrix( codaSamples )
apply(mcmcChain2, 2, summary)
lmerx

require(MCMCglmm) #Check with MCMCglmm
mc1 <- MCMCglmm(yl ~ x1 + x2, random=~id, data=df1)
summary(mc1); lmerx; apply(mcmcChain1, 2, summary); apply(mcmcChain2, 2, summary)

Edit: Included full script.
My version seems to be more consistent with lmer and MCMCglmm. So I'm not so sure what the difference is between your nested loop for the municipality level intercept and my method of doing it. Did you model your code from somewhere? I would also consider weakening the priors, unless you have a strong reason for them.

Best Answer

Related Solutions

Solved – Jags error with dgamma

Solved – Compilation error in JAGS

Related Question