Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
7141CEM_Coursework/KNN_Model_CO2Emission.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
98 lines (83 sloc)
3.41 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#read the Co2_Emission_Canada csv file from the following location | |
emission <- read.csv("C:/Users/LENOVO/Desktop/Big_Data/RStudio/Co2_Emission_Canada.csv") | |
#view the dataset | |
head(emission) | |
#view the dataset and it's datatype in detail | |
str(emission) | |
str(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
#get the Min. 1st Qu. Median Mean 3rd Qu. Max. values for the dataset | |
summary(emission$Fuel.Consumption.City..L.100.km.) | |
#create a histogram for the Fuel Consumption in City for the vehicle | |
hist(emission$Fuel.Consumption.City..L.100.km., | |
col="yellow", | |
main="Histogram showing Fuel Consumption in City", | |
xlab="Fuel Consumption", | |
ylab="No of Vehicles", | |
labels=TRUE) | |
summary(emission$Fuel.Consumption.Hwy..L.100.km.) | |
#create a histogram for the Fuel Consumption in Highway for the vehicle | |
hist(emission$Fuel.Consumption.Hwy..L.100.km., | |
col="green", | |
main="Histogram showing Fuel Consumption in Highway", | |
xlab="Fuel Consumption", | |
ylab="No of Vehicles", | |
labels=TRUE) | |
summary(emission$Fuel.Consumption.Comb..L.100.km.) | |
#create a histogram for the Fuel Consumption in Highway and city combined for the vehicle | |
hist(emission$Fuel.Consumption.Comb..L.100.km., | |
col="brown", | |
main="Histogram showing Fuel Consumption Combined", | |
xlab="Fuel Consumption", | |
ylab="No of Vehicles", | |
labels=TRUE) | |
summary(emission$CO2.Emissions.g.km.) | |
#create a histogram for the CO2 Emission for different vehicle | |
hist(emission$CO2.Emissions.g.km., | |
col="red", | |
main="Histogram showing CO2Emmission", | |
xlab="CO2 Emissions(g/km)", | |
ylab="No of Vehicles", | |
labels=TRUE) | |
summary(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
#create a table for the output data | |
a=table(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
#create a barplot for the output data | |
barplot(a,main="Using BarPlot to display Co2Emission fom the no of vehicles", | |
ylab="No Of Vehicles", | |
xlab="CO2 Level", | |
col=rainbow(3), | |
legend=rownames(a)) | |
#create factor for the output data | |
emission$High.Medium.Low.Co2Emission.2.1.0. <- factor(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
emission$High.Medium.Low.Co2Emission.2.1.0. | |
str(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
summary(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
#remove data that are not required from the dataset | |
emission$Model <- NULL | |
emission$Vehicle.Class <- NULL | |
emission$Transmission <- NULL | |
emission$Make <- NULL | |
emission$Fuel.Type <- NULL | |
str(emission) | |
emission$High.Medium.Low.Co2Emission.2.1.0. | |
str(emission) | |
#create a training dataset 70% of the total datasets | |
trainSet <- emission[1:5170, 1:8] | |
#create a testing dataset 30% of the total datasets | |
testSet <- emission[5171:7385, 1:8] | |
#import librtary class | |
library(class) | |
#create a model and remove the output column set k = 72 which is the square root of the training dataset | |
predictions <- knn(train = trainSet[,-8], trainSet$High.Medium.Low.Co2Emission.2.1.0., k = 72, test = testSet[,-8]) | |
predictions | |
#create a confusion table | |
confusion = table(predictions, testSet$High.Medium.Low.Co2Emission.2.1.0.) | |
confusion | |
#determine the accuracy of the model | |
Accuracy <- sum(diag(confusion))/nrow(testSet) | |
Accuracy | |
summary(emission$High.Medium.Low.Co2Emission.2.1.0.) | |
#import a library caret | |
library(caret) | |
#create a confusion matrix | |
confusionMatrix(predictions,testSet$High.Medium.Low.Co2Emission.2.1.0.) | |