#STA 370 Final
#Lindsay Dever
#Part 1

load('Dever.Rda')

#Attempt 1 - Normal and Normal
sample = vector(length = 10000)
i = 1
while( i <= 5000){
	draw = rnorm(1, 0, 1)
	if(draw >= 0){
		sample[i] = draw
		i = i + 1
	}
}
sample[5001:10000] = rnorm(100, 5, 1)

par(mfrow = c(1,2))
sampleDensity = hist(dever1, plot = F)$density
theta = seq(from = 0, to = 7, length.out = 1000)
fx = dnorm(theta, 0, 1)+ .5*dnorm(theta, 5,1)
hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Normals vs. Original Sample')
lines(theta, fx)
hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Normals')
lines(theta, fx)

#Attempt 2 Chi and Weibull
sample = c(rchisq(10000, df = 1.5), rweibull(10000, shape = 8, scale = 5))
dev.new(width=10, height=7)
par(mfrow = c(1,2))
theta = seq(from = 0, to = 8, length.out = 1000)
fx = .5*dchisq(theta, df = 1.5) + .5* dweibull(theta, shape = 8, scale = 5)
hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Chi and Weibull vs. Original')
lines(theta, fx)
hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Chi and Weibull')
lines(theta, fx)


#Attempt 3 – Weibull and weibull
dev.new(width=10, height=7)
par(mfrow = c(1,2))
sample = c(rweibull(5000, shape = 1, scale = 1), rweibull(5000, shape = 8, scale = 5))
theta = seq(from = 0, to = 8, length.out = 1000)
fx = .5*dweibull(theta, shape = 1, scale =1 )+ .5*dweibull(theta, shape = 8, scale = 5)
hist(dever1, prob = TRUE, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Mixture of Weibulls vs. Original Sample', breaks = 12)
lines(theta, fx)
hist(sample, prob = T, xlim = range(theta), ylim = c(0, max(sampleDensity)), main = 'Sample from Mixture of Weibulls')
lines(theta, fx)
cat("\nMean of Weibull sample: ", mean(sample))
cat("\nSD of Weibull sample: ", sd(sample), '\n')


#Part 2
#Adapted from PSET 10 solution, metrBl.R
#Designed for this specific problem only
#Will sample 1 independently, 2 and 3 dependently, and 4 and 5 dependently
Gibbs <-function(y, covar, mu, steps = 1000){
require(MASS)
if(steps<100){
warning("Function should take at least 100 steps")
}
# d is number of parameters to estimate here
# so number of dimensions of multivariate normal
# n is the number of observations
d = 5
n <- length(y[,1])
pi = 3.14159265
# this is to make sure covariance is not singular
# which would mean likelihood could not be
# calculated
if (is.nan(solve(covar)[1,1])) {
stop("covariance is singular")
}
# here we get the inverse of the covariance matrix
Einv <- solve(covar)
sigmaC <- covar
sigmaCinv <- solve(sigmaC)
#Standard deviation of each dimension
sds = vector(length = 5)
for(i in 1:5){
	sds[i] = sqrt(sigmaC[i,i])
}
rho1 = sigmaC[2,3]/(sds[2]*sds[3])
rho2 = sigmaC[4,5]/(sds[4]*sds[5])
# this is where we will keep our samples
# so rows are samples and columns for each dimension
targetSample <- matrix(nrow=steps, ncol=d)
# the first sample is at the candidate means
targetSample[1,] <- mu
x <- mu
for(i in 2:steps){
	#Sample from 1 indepedently
	targetSample[i,1] = rnorm(1, mu[1], sds[1])

	#Draw from 2 based on the previous value of 3
	m2 <- mu[2] + (sds[2]/sds[3])*rho1*(targetSample[i-1,3]-mu[3])
	s2 <- (sqrt(1-rho1^2)*sds[2])	
	targetSample[i,2] = rnorm(1,m2,s2)
	
	#Draw from 3 based on the current value of 2
	m3 = mu[3]+ (sds[3]/sds[2])*rho1*(targetSample[i,2] - mu[2])
	s3 = (sqrt(1-rho1^2)*sds[3])
	targetSample[i,3] = rnorm(1,m3,s3)
	
	#Draw from 4 based on the previous value of 5
	m4 <- mu[4] + (sds[4]/sds[5])*rho2*(targetSample[i-1,5]-mu[5])
	s4 <- (sqrt(1-rho2^2)*sds[4])		
	targetSample[i,4] = rnorm(1,m4,s4)
	
	#Draw from 5 based on the current value of 4
	m5 = mu[5]+ (sds[5]*rho2/sds[4])*(targetSample[i,4] - mu[4])
	s5 = (sqrt(1-rho2^2)*sds[5])
	targetSample[i,5] = rnorm(1,m5,s5)	
}
return(targetSample)
}

covar = matrix(nrow = 5, ncol = 5)
for(i in 1:5){
for(j in 1:5){ 
if(i != j){
covar[i,j] = var(data2[,i],data2[,j])}
else{
covar[i,j] = var(data2[,i])
}
	}
}
mu = vector()
muS = vector()
for(i in 1:5){
mu[i] = mean(data2[,i])}
Samp = Gibbs(data2, covar, mu, steps = 50000)
covarS = matrix(nrow = 5, ncol = 5)
for(i in 1:5){
for(j in 1:5){ 
if(i != j){
covarS[i,j] = var(Samp[,i],Samp[,j])}
else{
covarS[i,j] = var(Samp[,i])
}
	}
}
for(i in 1:5){
muS[i] = mean(Samp[,i])}

#Plot the original sample and the Gibbs sample
dev.new(width=10, height=7)
par(mfrow = c(1,3))
plot(Samp[,1], Samp[,2], pch = '.', col = 1, main = "Dimensions 1 and 2", xlab = "Dimension 1", ylab = "Dimension 2")
points(data2[,1], data2[,2], pch = 20, col = 4)
plot(Samp[,2], Samp[,3], pch = '.', col = 1, main = "Dimensions 2 and 3", xlab = "Dimension 2", ylab = "Dimension 3")
points(data2[,2], data2[,3], pch = 20, col = 4)
plot(Samp[,4], Samp[,5], pch = '.', col = 1, main = "Dimensions 4 and 5", xlab = "Dimension 4", ylab = "Dimension 5")
points(data2[,4], data2[,5], pch = 20, col = 4)

#Create histograms of each marginal sample
sds = vector(length = 5)
for(i in 1:5){
	sds[i] = sqrt(covar[i,i])
}
theta = matrix(nrow = 5000, ncol = 5)
sds = vector(length = 5)
for(i in 1:5){
 	sds[i] = sqrt(covar[i,i])
}
for(i in 1:5){
theta[,i] = seq(mu[i] - 3*sds[i], mu[i] + 3*sds[i], length.out = 5000)}
dev.new(width=10, height=7)
par(mfrow = c(2,3))
for(i in 1:5){
hist(Samp[,i], main = paste("Distribution ", i), prob = T, xlab = paste("Distribution ",i), col = 'steelblue2')
lines(theta[,i], dnorm(theta[,i], mu[i], sds[i]))}

#print covariance matrices
for(i in 1:5){
muS[i] = mean(Samp[,i])}
rn = c("Dim 1", "Dim 2", "Dim 3", "Dim 4", "Dim 5")
dimnames(covarS) = list(rn, rn)
dimnames(covar) = list(rn, rn)
print(round(covar, 3))
cat("\n")
print(round(covarS,3))
cat("\nMean of original: ", mu, "\n")
cat("Mean of sample: ", muS)

#Find bootstrap estimates of mean and standard deviation
bootM = bootSD = matrix(nrow = 1000, ncol = 5)
for(i in 1:1000){
	samp = Gibbs(data2, covar, mu, steps = 500)
	for(j in 1:5){		
		bootM[i,j] = mean(samp[,j])
		bootSD[i,j] = sd(samp[,j])
	}
}
for(j in 1:5){
	cat(paste('\nCredible intervals for mean and sd for Dimension ', j, '\n'))
	print(quantile(bootM[,j], c(.025,.975)))
	print(quantile(bootSD[,j], c(.025, .975)))
}