Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#read the Co2_Emission_Canada csv file from the following location
emission <- read.csv("C:/Users/LENOVO/Desktop/Big_Data/RStudio/Co2_Emission_Canada.csv")
#view the dataset
head(emission)
#view the dataset and it's datatype in detail
str(emission)
str(emission$High.Medium.Low.Co2Emission.2.1.0.)
#get the Min. 1st Qu. Median Mean 3rd Qu. Max. values for the dataset
summary(emission$Fuel.Consumption.City..L.100.km.)
#create a histogram for the Fuel Consumption in City for the vehicle
hist(emission$Fuel.Consumption.City..L.100.km.,
col="yellow",
main="Histogram showing Fuel Consumption in City",
xlab="Fuel Consumption",
ylab="No of Vehicles",
labels=TRUE)
summary(emission$Fuel.Consumption.Hwy..L.100.km.)
#create a histogram for the Fuel Consumption in Highway for the vehicle
hist(emission$Fuel.Consumption.Hwy..L.100.km.,
col="green",
main="Histogram showing Fuel Consumption in Highway",
xlab="Fuel Consumption",
ylab="No of Vehicles",
labels=TRUE)
summary(emission$Fuel.Consumption.Comb..L.100.km.)
#create a histogram for the Fuel Consumption in Highway and city combined for the vehicle
hist(emission$Fuel.Consumption.Comb..L.100.km.,
col="brown",
main="Histogram showing Fuel Consumption Combined",
xlab="Fuel Consumption",
ylab="No of Vehicles",
labels=TRUE)
summary(emission$CO2.Emissions.g.km.)
#create a histogram for the CO2 Emission for different vehicle
hist(emission$CO2.Emissions.g.km.,
col="red",
main="Histogram showing CO2Emmission",
xlab="CO2 Emissions(g/km)",
ylab="No of Vehicles",
labels=TRUE)
summary(emission$High.Medium.Low.Co2Emission.2.1.0.)
#create a table for the output data
a=table(emission$High.Medium.Low.Co2Emission.2.1.0.)
#create a barplot for the output data
barplot(a,main="Using BarPlot to display Co2Emission fom the no of vehicles",
ylab="No Of Vehicles",
xlab="CO2 Level",
col=rainbow(3),
legend=rownames(a))
#create factor for the output data
emission$High.Medium.Low.Co2Emission.2.1.0. <- factor(emission$High.Medium.Low.Co2Emission.2.1.0.)
emission$High.Medium.Low.Co2Emission.2.1.0.
str(emission$High.Medium.Low.Co2Emission.2.1.0.)
summary(emission$High.Medium.Low.Co2Emission.2.1.0.)
#remove data that are not required from the dataset
emission$Model <- NULL
emission$Vehicle.Class <- NULL
emission$Transmission <- NULL
emission$Make <- NULL
emission$Fuel.Type <- NULL
str(emission)
emission$High.Medium.Low.Co2Emission.2.1.0.
str(emission)
#create a training dataset 70% of the total datasets
trainSet <- emission[1:5170, 1:8]
#create a testing dataset 30% of the total datasets
testSet <- emission[5171:7385, 1:8]
#import librtary class
library(class)
#create a model and remove the output column set k = 72 which is the square root of the training dataset
predictions <- knn(train = trainSet[,-8], trainSet$High.Medium.Low.Co2Emission.2.1.0., k = 72, test = testSet[,-8])
predictions
#create a confusion table
confusion = table(predictions, testSet$High.Medium.Low.Co2Emission.2.1.0.)
confusion
#determine the accuracy of the model
Accuracy <- sum(diag(confusion))/nrow(testSet)
Accuracy
summary(emission$High.Medium.Low.Co2Emission.2.1.0.)
#import a library caret
library(caret)
#create a confusion matrix
confusionMatrix(predictions,testSet$High.Medium.Low.Co2Emission.2.1.0.)