#STA 370 Final #Lindsay Dever #Part 1 load('Dever.Rda') #Attempt 1 - Normal and Normal sample = vector(length = 10000) i = 1 while( i <= 5000){ draw = rnorm(1, 0, 1) if(draw >= 0){ sample[i] = draw i = i + 1 } } sample[5001:10000] = rnorm(100, 5, 1) par(mfrow = c(1,2)) sampleDensity = hist(dever1, plot = F)$density theta = seq(from = 0, to = 7, length.out = 1000) fx = dnorm(theta, 0, 1)+ .5*dnorm(theta, 5,1) hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Normals vs. Original Sample') lines(theta, fx) hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Normals') lines(theta, fx) #Attempt 2 Chi and Weibull sample = c(rchisq(10000, df = 1.5), rweibull(10000, shape = 8, scale = 5)) dev.new(width=10, height=7) par(mfrow = c(1,2)) theta = seq(from = 0, to = 8, length.out = 1000) fx = .5*dchisq(theta, df = 1.5) + .5* dweibull(theta, shape = 8, scale = 5) hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Chi and Weibull vs. Original') lines(theta, fx) hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Chi and Weibull') lines(theta, fx) #Attempt 3 – Weibull and weibull dev.new(width=10, height=7) par(mfrow = c(1,2)) sample = c(rweibull(5000, shape = 1, scale = 1), rweibull(5000, shape = 8, scale = 5)) theta = seq(from = 0, to = 8, length.out = 1000) fx = .5*dweibull(theta, shape = 1, scale =1 )+ .5*dweibull(theta, shape = 8, scale = 5) hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Weibulls vs. Original Sample', breaks = 12) lines(theta, fx) hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Weibulls') lines(theta, fx) cat("\nMean of Weibull sample: ", mean(sample)) cat("\nSD of Weibull sample: ", sd(sample), '\n') #Part 2 #Adapted from PSET 10 solution, metrBl.R #Designed for this specific problem only #Will sample 1 independently, 2 and 3 dependently, and 4 and 5 dependently Gibbs <-function(y, covar, mu, steps = 1000){ require(MASS) if(steps<100){ warning("Function should take at least 100 steps") } # d is number of parameters to estimate here # so number of dimensions of multivariate normal # n is the number of observations d = 5 n <- length(y[,1]) pi = 3.14159265 # this is to make sure covariance is not singular # which would mean likelihood could not be # calculated if (is.nan(solve(covar)[1,1])) { stop("covariance is singular") } # here we get the inverse of the covariance matrix Einv <- solve(covar) sigmaC <- covar sigmaCinv <- solve(sigmaC) #Standard deviation of each dimension sds = vector(length = 5) for(i in 1:5){ sds[i] = sqrt(sigmaC[i,i]) } rho1 = sigmaC[2,3]/(sds[2]*sds[3]) rho2 = sigmaC[4,5]/(sds[4]*sds[5]) # this is where we will keep our samples # so rows are samples and columns for each dimension targetSample <- matrix(nrow=steps, ncol=d) # the first sample is at the candidate means targetSample[1,] <- mu x <- mu for(i in 2:steps){ #Sample from 1 indepedently targetSample[i,1] = rnorm(1, mu[1], sds[1]) #Draw from 2 based on the previous value of 3 m2 <- mu[2] + (sds[2]/sds[3])*rho1*(targetSample[i-1,3]-mu[3]) s2 <- (sqrt(1-rho1^2)*sds[2]) targetSample[i,2] = rnorm(1,m2,s2) #Draw from 3 based on the current value of 2 m3 = mu[3]+ (sds[3]/sds[2])*rho1*(targetSample[i,2] - mu[2]) s3 = (sqrt(1-rho1^2)*sds[3]) targetSample[i,3] = rnorm(1,m3,s3) #Draw from 4 based on the previous value of 5 m4 <- mu[4] + (sds[4]/sds[5])*rho2*(targetSample[i-1,5]-mu[5]) s4 <- (sqrt(1-rho2^2)*sds[4]) targetSample[i,4] = rnorm(1,m4,s4) #Draw from 5 based on the current value of 4 m5 = mu[5]+ (sds[5]*rho2/sds[4])*(targetSample[i,4] - mu[4]) s5 = (sqrt(1-rho2^2)*sds[5]) targetSample[i,5] = rnorm(1,m5,s5) } return(targetSample) } covar = matrix(nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ if(i != j){ covar[i,j] = var(data2[,i],data2[,j])} else{ covar[i,j] = var(data2[,i]) } } } mu = vector() muS = vector() for(i in 1:5){ mu[i] = mean(data2[,i])} Samp = Gibbs(data2, covar, mu, steps = 50000) covarS = matrix(nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ if(i != j){ covarS[i,j] = var(Samp[,i],Samp[,j])} else{ covarS[i,j] = var(Samp[,i]) } } } for(i in 1:5){ muS[i] = mean(Samp[,i])} #Plot the original sample and the Gibbs sample dev.new(width=10, height=7) par(mfrow = c(1,3)) plot(Samp[,1], Samp[,2], pch = '.', col = 1, main = "Dimensions 1 and 2", xlab = "Dimension 1", ylab = "Dimension 2") points(data2[,1], data2[,2], pch = 20, col = 4) plot(Samp[,2], Samp[,3], pch = '.', col = 1, main = "Dimensions 2 and 3", xlab = "Dimension 2", ylab = "Dimension 3") points(data2[,2], data2[,3], pch = 20, col = 4) plot(Samp[,4], Samp[,5], pch = '.', col = 1, main = "Dimensions 4 and 5", xlab = "Dimension 4", ylab = "Dimension 5") points(data2[,4], data2[,5], pch = 20, col = 4) #Create histograms of each marginal sample sds = vector(length = 5) for(i in 1:5){ sds[i] = sqrt(covar[i,i]) } theta = matrix(nrow = 5000, ncol = 5) sds = vector(length = 5) for(i in 1:5){ sds[i] = sqrt(covar[i,i]) } for(i in 1:5){ theta[,i] = seq(mu[i] - 3*sds[i], mu[i] + 3*sds[i], length.out = 5000)} dev.new(width=10, height=7) par(mfrow = c(2,3)) for(i in 1:5){ hist(Samp[,i], main = paste("Distribution ", i), prob = T, xlab = paste("Distribution ",i), col = 'steelblue2') lines(theta[,i], dnorm(theta[,i], mu[i], sds[i]))} #print covariance matrices for(i in 1:5){ muS[i] = mean(Samp[,i])} rn = c("Dim 1", "Dim 2", "Dim 3", "Dim 4", "Dim 5") dimnames(covarS) = list(rn, rn) dimnames(covar) = list(rn, rn) print(round(covar, 3)) cat("\n") print(round(covarS,3)) cat("\nMean of original: ", mu, "\n") cat("Mean of sample: ", muS) #Find bootstrap estimates of mean and standard deviation bootM = bootSD = matrix(nrow = 1000, ncol = 5) for(i in 1:1000){ samp = Gibbs(data2, covar, mu, steps = 500) for(j in 1:5){ bootM[i,j] = mean(samp[,j]) bootSD[i,j] = sd(samp[,j]) } } for(j in 1:5){ cat(paste('\nCredible intervals for mean and sd for Dimension ', j, '\n')) print(quantile(bootM[,j], c(.025,.975))) print(quantile(bootSD[,j], c(.025, .975))) }