Skip to content
Permalink
da8a5bac54
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
216 lines (216 sloc) 5.55 KB
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#importing libraries\n",
"import pandas as pd\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#loads the dataset from a csv file\n",
"file_data = pd.read_csv('housing_price_dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" SquareFeet Bedrooms Bathrooms Neighborhood YearBuilt Price Age\n",
"0 2126 4 1 Rural 1969 215355.283618 54\n",
"1 2459 3 2 Rural 1980 195014.221626 43\n",
"2 1860 2 1 Suburb 1970 306891.012076 53\n",
"3 2294 2 1 Urban 1996 206786.787153 27\n",
"4 2130 5 2 Suburb 2001 272436.239065 22\n"
]
}
],
"source": [
"#gets a snippet from the top of the dataset\n",
"file_data.head()\n",
"print(file_data.head())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50000, 7)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#gets the shape of the dataset\n",
"file_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method DataFrame.info of SquareFeet Bedrooms Bathrooms Neighborhood YearBuilt Price \\\n",
"0 2126 4 1 Rural 1969 215355.283618 \n",
"1 2459 3 2 Rural 1980 195014.221626 \n",
"2 1860 2 1 Suburb 1970 306891.012076 \n",
"3 2294 2 1 Urban 1996 206786.787153 \n",
"4 2130 5 2 Suburb 2001 272436.239065 \n",
"... ... ... ... ... ... ... \n",
"49995 1282 5 3 Rural 1975 100080.865895 \n",
"49996 2854 2 2 Suburb 1988 374507.656727 \n",
"49997 2979 5 3 Suburb 1962 384110.555590 \n",
"49998 2596 5 2 Rural 1984 380512.685957 \n",
"49999 1572 5 3 Rural 2011 221618.583218 \n",
"\n",
" Age \n",
"0 54 \n",
"1 43 \n",
"2 53 \n",
"3 27 \n",
"4 22 \n",
"... ... \n",
"49995 48 \n",
"49996 35 \n",
"49997 61 \n",
"49998 39 \n",
"49999 12 \n",
"\n",
"[50000 rows x 7 columns]>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#get information about the dataset\n",
"file_data.info"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SquareFeet False\n",
"Bedrooms False\n",
"Bathrooms False\n",
"Neighborhood False\n",
"YearBuilt False\n",
"Price False\n",
"Age False\n",
"dtype: bool"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#finds if any of the values are null\n",
"file_data.isnull().any()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SquareFeet 2000\n",
"Bedrooms 4\n",
"Bathrooms 3\n",
"Neighborhood 3\n",
"YearBuilt 72\n",
"Price 50000\n",
"Age 72\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#finds the number of unique values per column\n",
"file_data.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#convert 'YearBuilt' to age\n",
"current_year = int(datetime.now().strftime(\"%Y\"))\n",
"file_data['Age'] = current_year - file_data['YearBuilt']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#exports processed data to new csv file\n",
"processed_file_path = 'processed_data.csv'\n",
"file_data.to_csv(processed_file_path, index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}