# Introduction to Bayesian modeling # Binomial probability distribution. # Probability (likelihood) of observing S survivors in N individuals given survival probability theta S=8 N=10 theta=0.76 # help(dbinom) dbinom(x=S,size=N,prob=theta) # Use R to calculate the likelihood for a series (a vector) of values for S at once allS=0:N like=dbinom(x=allS,size=N,prob=theta) data.frame(allS,like) x11(height=5.5,width=7,xpos=1200,ypos=1) # windows(height=5.5,width=7,xpos=1200,ypos=1) plot(allS,like,pch=16,main="binomial likelihood") sum(like) # Or calculate the log-likelihood, which is usually the currency in likelihood models loglike=dbinom(x=allS,size=N,prob=theta,log=TRUE) # data.frame(allS,loglike) x11(height=5.5,width=7,xpos=1200,ypos=500) # windows(height=5.5,width=7,xpos=1200,ypos=500) plot(allS,loglike,pch=16,main="binomial log-likelihood") # Consider again the data, S=8 survivors of N=10 individuals, and what the likelihood of observing this # would be given various theta. theta=0.76 dbinom(x=S,size=N,prob=theta) theta=0.74 dbinom(x=S,size=N,prob=theta) theta=0.72 dbinom(x=S,size=N,prob=theta) theta=0.78 dbinom(x=S,size=N,prob=theta) theta=0.82 dbinom(x=S,size=N,prob=theta) # Take advantage of the vectorization again. This is concept of the posterior distribution. theta=seq(0,1,by=0.01) postlike=dbinom(x=S,size=N,prob=theta) dev.set(2) plot(theta,postlike,pch=16,type="l",main="binomial posterior",ylab="prob") abline(v=c(0.76,0.8,0.84)) dev.set(3) logpostlike=dbinom(x=S,size=N,prob=theta,log=TRUE) plot(theta,logpostlike,pch=16,type="l",main="binomial posterior (log-likelihood)",ylab="prob") abline(v=c(0.76,0.8,0.84)) # Approximate integral of the posterior 'distribution' is easy using sum, remembering to multiply by # the width of intervals in theta (0.01). The graph illustrates why this quick approximation to the integral # works. x11(height=8,width=11,xpos=700,ypos=1) # windows(height=8,width=11,xpos=700,ypos=1) plot(theta,postlike,pch=16,main="binomial posterior",ylab="prob") abline(v=c(.595,.605)) abline(h=0.121,col="red") sum(0.01*postlike) # Dividing by the sum converts this to a true probability distribution (by assuring the integral is 1). k=sum(0.01*postlike) post=postlike/k sum(0.01*post) # Overlay the graphs of the adjusted posterior dev.set(2) lines(theta,post,type="l",col="red",main="adjusted binomial posterior",ylab="prob") plot(theta,post,pch=16,type="l",col="red",main="adjusted binomial posterior",ylab="prob") abline(v=c(0.76,0.8,0.84)) lines(theta,postlike) dev.set(3) logpost=logpostlike-log(k) plot(theta,logpost,type="l",col="red",main="adjusted binomial posterior (log)",ylab="prob") abline(v=c(0.76,0.8,0.84)) lines(theta,logpostlike) # This probability distribution -- the posterior distribution -- is the beta distribution, with parameters # S+1 and N-S+1 # help(dbeta) betaprob=dbeta(theta,shape1=S+1,shape2=N-S+1) dev.set(2) points(theta,betaprob) logbetaprob=dbeta(theta,shape1=S+1,shape2=N-S+1,log=TRUE) dev.set(3) points(theta,logbetaprob) # Since the beta distribution describes the posterior distribution, and it's a true probability distribution, # it gives all the Bayesian results. # help(dbeta) # Mean (this is the formula): (S+1)/(N+2) # 95% credible intervals are where cumulative probability reaches .025 and .975: qbeta(p=c(0.025,0.975),shape1=S+1,shape2=N-S+1) dev.set(2) CI=qbeta(p=c(0.025,0.975),shape1=S+1,shape2=N-S+1) plot(theta,betaprob,type="l",main="beta distribution (posterior)") abline(v=CI,col="blue",lwd=2) # Random draws from the posterior distribution: rbeta(n=25,shape1=S+1,shape2=N-S+1) dev.set(3) randdraw=rbeta(n=10000,shape1=S+1,shape2=N-S+1) hist(x=randdraw,breaks=theta) quantile(x=randdraw,probs=c(0.025,0.975)) CIrand=quantile(x=randdraw,probs=c(0.025,0.975)) abline(v=CIrand,col="blue",lwd=2) # Doing a random walk on the likelihood function as an alternative for making random draws from the posterior. This does not # require a closed form for the posterior, rather relies on the fact that the likelihood function is a constant times the # posterior. x11(height=8,width=11,xpos=700,ypos=1) # windows(height=8,width=11,xpos=700,ypos=1) # Here's the likelihood function, as a function of theta. It's a constant times the posterior (lines 45-46) plot(theta,postlike,type="l",main="binomial pseudo-posterior",ylab="prob") # Create a Markov chain of values theta. See functions in RfuncBayesWorkshop.r. showBinomialMCMC(thetastart=.2,S=8,N=10) showBinomialMCMC(thetastart=.2,S=8,N=10,pause=FALSE,steps=100) thetaMCMC=showBinomialMCMC(thetastart=.2,S=8,N=10,pause=FALSE,steps=1000,showstep=4000) # thetaMCMC=showBinomialMCMC(thetastart=.2,S=8,N=10,pause=FALSE,steps=4000,showstep=4000) # Compare to histogram based on beta distribution x11(height=8,width=8,xpos=200,ypos=500) hist(x=randdraw,breaks=theta) # Compare quantiles of beta distribution quantile(x=randdraw,probs=c(0.025,0.975)) quantile(thetaMCMC,probs=c(.025,.975)) graphics.off() # Posterior distribution of normal likelihood # Assume a single observation y is drawn from a normal distribution, assume SD is known to be 25 and mean known to be 125. # The likelihood function is the normal density. y=110 mu=125 sigma=25 dnorm(x=y,mean=mu,sd=sigma) dnorm(x=y,mean=mu,sd=sigma,log=TRUE) allY=1:250 like=dnorm(x=allY,mean=mu,sd=sigma) x11(height=5.5,width=7,xpos=1200,ypos=1) # windows(height=5.5,width=7,xpos=1200,ypos=1) plot(allY,like,pch=16,type="l",main="normal likelihood") abline(v=mu) sum(like) # This is the probability of observing any value y given mu, sigma, and the normal assumption. # To continue with the ideas demonstrated for the binomial, it is far more interesting to consider a series of values y. # Imagine, for instance, 10 different animals were weighed and we are interested in estimating the mean and SD of the population # by estimating posterior distributions. sample=10 weights=rnorm(n=sample,mean=mu,sd=sigma) # If the mean were 125 and the SD 25, the likelihood is dnorm(x=weights,mean=mu,sd=sigma) # There are 10 points so 10 likelihoods, and it is easier to work with log-likelihood from the start. # We always need the sum of the log-likelihoods, which is the (log) likelihood of the entire set of data. The absolute # probability is not useful. loglike=dnorm(x=weights,mean=mu,sd=sigma,log=TRUE) totalLogLike=sum(loglike) # Just like for the binomial, the posterior distribution starts by expressing likelihoods as a function of the parameters. But with # the normal, there are two parameters. To make graphs, fix one of the two parameters. Start assuming that the SD is known (fixed) # and examine a posterior distribution of the parameter mu. allMu=1:250 postloglike=calcNormalManyMu(weights,mean=allMu,sd=sigma) x11(height=5.5,width=7,xpos=1200,ypos=500) # windows(height=5.5,width=7,xpos=1200,ypos=1) plot(allMu,postloglike,type="l",pch=16,main="normal posterior likelihood (mean)") plot(allMu,exp(postloglike),type="l",pch=16,main="normal posterior likelihood (mean)") sum(exp(postloglike)) k=sum(exp(postloglike)) post=dnorm(allMu,mean=mean(weights),sd=sigma/sqrt(sample)) points(allMu,k*post) # Repeat but now vary SD while fixing mu. Ie, the mean is known; examine the posterior distribution of the SD. allSD=1:100 postloglike=calcNormalManySD(weights,mean=mu,sd=allSD) x11(height=5.5,width=7,xpos=1200,ypos=500) # windows(height=5.5,width=7,xpos=1200,ypos=500) plot(allSD,postloglike,type="l",pch=16,main="normal posterior likelihood (SD)") plot(allSD,exp(postloglike),type="l",pch=16,main="normal posterior likelihood (SD)") sum(exp(postloglike)) k=sum(exp(postloglike)) graphics.off() # Run a Gibbs sampler with Metropolis acceptance to calculate posterior distributions of mu and SD. The function # normalPosterior.Gibbs is in RfuncBayesWorkshop.r. GibbsResult=normalPosterior.Gibbs(data=weights,steps=20,showstep=20) GibbsResult=normalPosterior.Gibbs(data=weights,steps=20,showstep=1,pause=FALSE) GibbsResult # Plot results of a long run GibbsResult=normalPosterior.Gibbs(data=weights,steps=4000,showstep=4000,pause=FALSE) colMeans(GibbsResult) x11(height=5.5,width=7,xpos=1200,ypos=1) # windows(height=5.5,width=7,xpos=1200,ypos=1) plot(GibbsResult$muMCMC) x11(height=5.5,width=7,xpos=1200,ypos=500) plot(GibbsResult$sdMCMC) # Adjusting the step size works much better GibbsResult=normalPosterior.Gibbs(data=weights,steps=4000,showstep=4000,pause=FALSE,adjuststep=TRUE) colMeans(GibbsResult) dev.set(2) plot(GibbsResult$muMCMC) dev.set(3) plot(GibbsResult$sdMCMC) # Show the step size changes dev.set(2) plot(GibbsResult$stepsizeMu) dev.set(3) plot(GibbsResult$stepsizeSD) # Likelihood profiles dev.set(2) plot(GibbsResult$muMCMC,GibbsResult$llikeMu) dev.set(3) plot(GibbsResult$sdMCMC,GibbsResult$llikeSD) # A 3D likelihood profile x11(height=8,width=10,xpos=500,ypos=1) # windows(height=8,width=10,xpos=500,ypos=1) maxllike=max(GibbsResult$llikeSD) plot(GibbsResult$sdMCMC,GibbsResult$muMCMC,col="red",pch=1) include=GibbsResult$llikeSD>(maxllike-4) points(GibbsResult$sdMCMC[include],GibbsResult$muMCMC[include],col="lightgreen",pch=16) include=GibbsResult$llikeSD>(maxllike-2) points(GibbsResult$sdMCMC[include],GibbsResult$muMCMC[include],col="lightblue",pch=16) include=GibbsResult$llikeSD>(maxllike-1) points(GibbsResult$sdMCMC[include],GibbsResult$muMCMC[include],col="green",pch=16) include=GibbsResult$llikeSD>(maxllike-.5) points(GibbsResult$sdMCMC[include],GibbsResult$muMCMC[include],col="blue",pch=16) include=GibbsResult$llikeSD>(maxllike-.25) points(GibbsResult$sdMCMC[include],GibbsResult$muMCMC[include],col="black",pch=16)