Skip to content
Permalink
Browse files
Add files via upload
  • Loading branch information
lopesoll committed Mar 6, 2023
1 parent 7a7c542 commit 4747aeceb72cf03df70179fc4f03c0d0b062be7a
Show file tree
Hide file tree
Showing 8 changed files with 7,453 additions and 0 deletions.
@@ -0,0 +1,66 @@
#time series plots
plot(Dataframe$time, Dataframe$x1, type = "n", xlim = c(30, 50), ylim = c(-5, 4), xlab="Time", ylab="EEG Signals")
lines(Dataframe$time, Dataframe$x1, col="blue", lwd="1")
lines(Dataframe$time, Dataframe$x2, col="red", lwd="1")

plot(Dataframe$time, Dataframe$x1, type = "n", xlim = c(50, 54), ylim = c(-5, 3.5), xlab="Time", ylab="EEG Signals")
lines(Dataframe$time, Dataframe$x1, col="blue", lwd="2")
lines(Dataframe$time, Dataframe$x2, col="red", lwd="2")
lines(Dataframe$time, Dataframe$y, col="purple", lwd="1")


plot(Dataframe$time, Dataframe$y, type = "l", xlim = c(30, 50), ylim = c(-50, 50), col="purple", xlab="Time", ylab="Audio input")

#distribution of the signals
hist(Dataframe$x1, col="blue", main = ("Distribution of X1"), xlab="", ylab="Frequency")
hist(Dataframe$x2, col="red", main = ("Distribution of X2"), xlab="", ylab="Frequency")
hist(Dataframe$y, col="purple", main = ("Distribution of Y"), xlab="", ylab="Frequency")


#variance
print(var(Dataframe$x1))
print(var(Dataframe$x2))
print(var(Dataframe$y))
#mean
print(mean(Dataframe$x1))
print(mean(Dataframe$x2))
print(mean(Dataframe$y))
#standard deviation
print(sd(Dataframe$x1))
print(sd(Dataframe$x2))
print(sd(Dataframe$y))
#correlation
print(cor(Dataframe$x1, Dataframe$y))
print(cor(Dataframe$x2, Dataframe$y))

#scatter plot with linear model
plot(Dataframe$x1, Dataframe$y, xlab = "X1", ylab = "Y")#positive correlation
abline(lm(Dataframe$y ~ Dataframe$x1), col = "red")

plot(Dataframe$x2, Dataframe$y, xlab = "X2", ylab = "Y") #no correlation
abline(lm(Dataframe$y ~ Dataframe$x2), col = "blue")

df=data.frame(Dataframe$x1,Dataframe$x2, Dataframe$y)

#box plots
library(ggplot2)
df=data.frame(X, Dataframe$y)

# Plot the box plot
ggplot(data = df, aes(x = "x1", y = x1)) +
geom_boxplot() +
xlab("EEG Signal") +
ylab("Value") +
ggtitle("Boxplot of EEG Signal x1")

ggplot(data = df, aes(x = "x2", y = x2)) +
geom_boxplot() +
xlab("EEG Signal") +
ylab("Value") +
ggtitle("Boxplot of EEG Signal x2")

ggplot(data = df, aes(x = "y", y = Dataframe.y)) +
geom_boxplot() +
xlab("EEG Signal") +
ylab("Value") +
ggtitle("Boxplot of Sound y")
@@ -0,0 +1,53 @@
set.seed(123)
#split the matrix of data 70% training data and 30% testing data
data = sort(sample(nrow(X), nrow(X)*.7))
train_data<-Dataframe[data,]
test_data<-Dataframe[-data,]

#task 2.7.1
#create the theta bias of length X1 in training data.
const = rep(1, times=length(train_data$x1))

#create the theta bias of length X1 in training data.
const_test = rep(1, times=length(test_data$x1))

#create the model
model3=cbind((train_data$x1^3), train_data$x2, train_data$x1, const)
model3_test_data=cbind((test_data$x1^3), test_data$x2, test_data$x1, const_test)

#estimate model parameters using least squares (training data)
thetahat=solve(t(model3) %*% model3) %*% t(model3) %*% train_data$y

#compute model prediction using test data
y_pred=model3_test_data %*% thetahat

#plot 95% confidence interval
residuals = test_data$y - y_pred
rss = sum(residuals^2)
n = length(test_data$y)
#calculate variance of the residuals
sigma_squared = rss/(n-1)
#compute the estimated covariance matrix of the model parameters
cov_thetaHat = sigma_squared * (solve(t(model3_test_data) %*% model3_test_data))

var_y_hat = matrix(0 , n , 1)

#compute the variance for of y_pred
for( i in 1:n){
X_i = matrix( model3_test_data[i,] , 1 , 4 ) # X[i,] creates a vector. Convert it to matrix
var_y_hat[i,1] = X_i %*% cov_thetaHat %*% t(X_i)
}

#get all three variables to order
CI = 2 * sqrt(var_y_hat) # Confidence interval
y=test_data$y
time=test_data$time

#create dataframe and order it to plot
data = data.frame(y, y_pred, CI, time)
data = data[order(data$time), ]

#plot prediction, testing data, and confidence intervals
plot(data$time, data$y_pred, type = "o", xlim = c(41, 47), ylim = c(-1, 1), col="steelblue1", xlab="Time", ylab="Y", lwd=2)
points(data$time, data$y, col="forestgreen", lwd=2)
segments(data$time, data$y_pred - data$CI, data$time, data$y_pred + data$CI, col="red1", lwd=3)
@@ -0,0 +1,63 @@
set.seed(123)

#create the theta bias
constant = rep(1, times = length(y))

#split X into X1 and X2
X1=Dataframe[,1]
X2=Dataframe[,2]

#create the models
model1=cbind((X1^3), (X2^5), constant)
model2=cbind((X1^4), (X2^2), constant)
model3=cbind((X1^3), X2, X1, constant)
model4=cbind(X1, (X1^2),(X1^3), (X2^3), constant)
model5=cbind((X1^3), (X1^4), X2, constant)

for (i in 1:5) {
#get the model for every index: (model + i)
var_name=paste0("model", i)
my_model=get(var_name)

#estimate model parameters using least squares (task 2.1)
thetahat=solve(t(my_model) %*% my_model) %*% t(my_model) %*% y
#print(thetahat) #print thetahat to include in the report

#compute the models prediction
y_pred=my_model %*% thetahat

#compute model residual (error) and calculate the sum of squared errors (task 2.2)
residuals = y - y_pred
rss = sum(residuals^2)
print(paste0("--------------------- Model ", i," -----------------------"))
print(paste0("RSS for model ",i, ": ", rss))

#compute the log-likelihood function (task 2.3)
n = length(y)
sigma_squared = rss/(n-1)
loglikelihood = -(n/2) * log(2*pi) - (n/2) * log(sigma_squared) - (1/(2*sigma_squared)) * rss
print(paste0("log likelihood for model ",i, ": ", loglikelihood))

#define k for AIC and BIC this is the number of estimated parameters in each model
#K is 4 in model 1 and 2, K is 5 in model 3 and 5, K is 6 in model 4
if (i == 1 || i == 2) {
k= 4
} else if (i == 4){
k= 6
} else {
k= 5
}
#compute AIC (task 2.4)
AIC = 2 * k - 2 * loglikelihood
print(paste0("AIC for model ",i, ": ", AIC, " info: K is equal to: ", k))

#compute BIC
BIC = log(n) * k - 2 * loglikelihood
print(paste0("BIC for model ",i, ": ", BIC))

#Check if the residuals are close to normal/Gaussian (task 2.5)
qqnorm(residuals, main=paste("Q-Q Plot for Model", i))
qqline(residuals)
hist(residuals, col="limegreen", main=paste("Distribution of the residuals for model", i), xlab="Model prediction errors", ylab="Frequency")
print(shapiro.test(residuals))
}
@@ -0,0 +1,58 @@
set.seed(123)
constant = rep(1, times = length(y))
n=length(Dataframe$y)
X1 = Dataframe$x1
X2 = Dataframe$x2

model_matrix = cbind((X1^3), X2, X1, constant)

# Use a Uniform distribution as prior, around the estimated parameter values for those 2 parameters
prior1 = runif(10000, 4.181390 - 1, 4.181390 + 1)#0.078 because the when set to 1, the max and min accepted ranges +-0.57
prior2 = runif(10000, -6.651400 -1, -6.651400 + 1)#0.1

# Create empty array to store parameters
accepted_params = matrix(nrow = 0, ncol = 3)

num_iter = 100000
tolerance = 0.018

for (i in 1:num_iter) {
# Draw parameters from prior
theta3 = sample(prior1, 1)
thetaBias = sample(prior2, 1)

# Fix other parameters as constant
thetahat = c(2.715713, -3.151350, theta3, thetaBias)
theta=matrix(thetahat)

# Compute simulated data using the model
y_sim = model_matrix %*% theta

# Compute distance between simulated and observed data
dist = (sqrt(sum((y_sim - y)^2)))/n

print(dist)
# Accept or reject parameter values based on the distance
if (dist < tolerance) {
accepted_params = rbind(accepted_params, c(theta3, thetaBias, dist))
}
}

# Plot joint posterior distribution
library(ggplot2)

# Extract accepted parameters
theta3_posterior = accepted_params[, 1]
thetaBias_posterior = accepted_params[, 2]

# Plot joint posterior distribution
#plot(accepted_params[,1], accepted_params[,2], type = "p",
# xlab = "Theta 3", ylab = "Theta Bias",col="blue", main = "Joint Posterior Distribution", lwd=1)

ggplot(data.frame(theta3_posterior, thetaBias_posterior), aes(x=theta3_posterior, y=thetaBias_posterior)) +
geom_bin2d(bins = 15, colour="green") +
labs(x = expression(theta[3]), y = expression(theta[Bias]))

#plot the marginal distribution
hist(theta3_posterior, col="orangered1", main = ("Posterior distribution for Theta 3"), xlab="Value", ylab="Frequency")
hist(thetaBias_posterior, col="limegreen", main = ("Posterior distribution for Theta Bias"), xlab="Value", ylab="Frequency")

0 comments on commit 4747aec

Please sign in to comment.