Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
set.seed(123)
#create the theta bias
constant = rep(1, times = length(y))
#split X into X1 and X2
X1=Dataframe[,1]
X2=Dataframe[,2]
#create the models
model1=cbind((X1^3), (X2^5), constant)
model2=cbind((X1^4), (X2^2), constant)
model3=cbind((X1^3), X2, X1, constant)
model4=cbind(X1, (X1^2),(X1^3), (X2^3), constant)
model5=cbind((X1^3), (X1^4), X2, constant)
for (i in 1:5) {
#get the model for every index: (model + i)
var_name=paste0("model", i)
my_model=get(var_name)
#estimate model parameters using least squares (task 2.1)
thetahat=solve(t(my_model) %*% my_model) %*% t(my_model) %*% y
#print(thetahat) #print thetahat to include in the report
#compute the models prediction
y_pred=my_model %*% thetahat
#compute model residual (error) and calculate the sum of squared errors (task 2.2)
residuals = y - y_pred
rss = sum(residuals^2)
print(paste0("--------------------- Model ", i," -----------------------"))
print(paste0("RSS for model ",i, ": ", rss))
#compute the log-likelihood function (task 2.3)
n = length(y)
sigma_squared = rss/(n-1)
loglikelihood = -(n/2) * log(2*pi) - (n/2) * log(sigma_squared) - (1/(2*sigma_squared)) * rss
print(paste0("log likelihood for model ",i, ": ", loglikelihood))
#define k for AIC and BIC this is the number of estimated parameters in each model
#K is 4 in model 1 and 2, K is 5 in model 3 and 5, K is 6 in model 4
if (i == 1 || i == 2) {
k= 4
} else if (i == 4){
k= 6
} else {
k= 5
}
#compute AIC (task 2.4)
AIC = 2 * k - 2 * loglikelihood
print(paste0("AIC for model ",i, ": ", AIC, " info: K is equal to: ", k))
#compute BIC
BIC = log(n) * k - 2 * loglikelihood
print(paste0("BIC for model ",i, ": ", BIC))
#Check if the residuals are close to normal/Gaussian (task 2.5)
qqnorm(residuals, main=paste("Q-Q Plot for Model", i))
qqline(residuals)
hist(residuals, col="limegreen", main=paste("Distribution of the residuals for model", i), xlab="Model prediction errors", ylab="Frequency")
print(shapiro.test(residuals))
}