Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import dask.dataframe as d\n",
"import matplotlib.pyplot as plt\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"\n",
"data = d.read_csv(\"trips_by_distance.csv\", assume_missing=True)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "Dask diagnostics requirements are not installed.\n\nPlease either conda or pip install as follows:\n\n conda install dask # either conda install\n python -m pip install \"dask[diagnostics]\" --upgrade # or python -m pip install",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\dask\\widgets\\__init__.py:4\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdask\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwidgets\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwidgets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 5\u001b[0m FILTERS,\n\u001b[0;32m 6\u001b[0m TEMPLATE_PATHS,\n\u001b[0;32m 7\u001b[0m get_environment,\n\u001b[0;32m 8\u001b[0m get_template,\n\u001b[0;32m 9\u001b[0m )\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\dask\\widgets\\widgets.py:7\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpath\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mjinja2\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Environment, FileSystemLoader, Template\n\u001b[0;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mjinja2\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexceptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TemplateNotFound\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'jinja2'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\IPython\\core\\formatters.py:344\u001b[0m, in \u001b[0;36mBaseFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 342\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[0;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 344\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 345\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 346\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\dask\\dataframe\\core.py:6601\u001b[0m, in \u001b[0;36mDataFrame._repr_html_\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 6597\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_repr_html_\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 6598\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_repr_data()\u001b[38;5;241m.\u001b[39mto_html(\n\u001b[0;32m 6599\u001b[0m max_rows\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, show_dimensions\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, notebook\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 6600\u001b[0m )\n\u001b[1;32m-> 6601\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_template\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdataframe.html.j2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mrender(\n\u001b[0;32m 6602\u001b[0m data\u001b[38;5;241m=\u001b[39mdata,\n\u001b[0;32m 6603\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_name,\n\u001b[0;32m 6604\u001b[0m layers\u001b[38;5;241m=\u001b[39mmaybe_pluralize(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdask\u001b[38;5;241m.\u001b[39mlayers), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgraph layer\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m 6605\u001b[0m )\n",
"File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\dask\\widgets\\__init__.py:26\u001b[0m, in \u001b[0;36mget_template\u001b[1;34m(name)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_template\u001b[39m(name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexception\u001b[39;00m\n",
"\u001b[1;31mImportError\u001b[0m: Dask diagnostics requirements are not installed.\n\nPlease either conda or pip install as follows:\n\n conda install dask # either conda install\n python -m pip install \"dask[diagnostics]\" --upgrade # or python -m pip install"
]
},
{
"data": {
"text/plain": [
"Dask DataFrame Structure:\n",
" Level Date State FIPS State Postal Code County FIPS County Name Population Staying at Home Population Not Staying at Home Number of Trips Number of Trips <1 Number of Trips 1-3 Number of Trips 3-5 Number of Trips 5-10 Number of Trips 10-25 Number of Trips 25-50 Number of Trips 50-100 Number of Trips 100-250 Number of Trips 250-500 Number of Trips >=500 Row ID Week Month\n",
"npartitions=2 \n",
" object object float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 float64 object float64 float64\n",
" ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
" ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"Dask Name: dropna, 2 graph layers"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Week\n",
"0.0 1.204752e+05\n",
"1.0 1.511653e+05\n",
"2.0 1.495038e+05\n",
"3.0 1.468154e+05\n",
"4.0 1.467504e+05\n",
"5.0 1.449985e+05\n",
"6.0 1.435468e+05\n",
"7.0 1.469490e+05\n",
"8.0 1.391244e+05\n",
"9.0 1.389168e+05\n",
"10.0 1.393175e+05\n",
"11.0 1.542455e+05\n",
"12.0 1.569134e+05\n",
"13.0 1.590891e+05\n",
"14.0 1.591531e+05\n",
"15.0 1.607076e+05\n",
"16.0 1.552874e+05\n",
"17.0 1.548010e+05\n",
"18.0 1.546350e+05\n",
"19.0 1.489824e+05\n",
"20.0 1.422526e+05\n",
"21.0 1.498488e+05\n",
"22.0 1.483272e+05\n",
"23.0 8.597876e+04\n",
"24.0 8.686647e+04\n",
"25.0 1.052256e+05\n",
"26.0 1.111628e+05\n",
"27.0 1.083400e+05\n",
"28.0 1.083705e+05\n",
"29.0 1.128345e+05\n",
"30.0 1.117844e+05\n",
"31.0 1.092694e+05\n",
"32.0 1.036682e+05\n",
"33.0 1.029203e+05\n",
"34.0 1.071395e+05\n",
"35.0 1.127195e+05\n",
"36.0 1.123887e+05\n",
"37.0 1.118105e+05\n",
"38.0 1.114665e+05\n",
"39.0 1.140702e+05\n",
"40.0 1.167576e+05\n",
"41.0 1.179320e+05\n",
"42.0 1.183920e+05\n",
"43.0 1.411245e+05\n",
"44.0 2.887722e+06\n",
"45.0 3.086734e+06\n",
"46.0 3.088759e+06\n",
"47.0 3.119602e+06\n",
"48.0 3.009276e+06\n",
"49.0 3.042222e+06\n",
"50.0 3.075785e+06\n",
"51.0 3.264755e+06\n",
"52.0 3.259163e+06\n",
"Name: Population Staying at Home, dtype: float64 People staying\n"
]
},
{
"ename": "AttributeError",
"evalue": "'Series' object has no attribute 'iloc'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[92], line 30\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m column_name \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m6\u001b[39m:]: \u001b[38;5;66;03m# Adjusted column range to exclude columns until the 6th index\u001b[39;00m\n\u001b[0;32m 28\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m column_name \u001b[38;5;129;01min\u001b[39;00m average_distance_per_trip:\n\u001b[0;32m 29\u001b[0m \u001b[38;5;66;03m# Perform multiplication for the first item in each column and add to the total distance traveled\u001b[39;00m\n\u001b[1;32m---> 30\u001b[0m total_distance_traveled \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcolumn_name\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miloc\u001b[49m[row_count] \u001b[38;5;241m*\u001b[39m average_distance_per_trip[column_name])\u001b[38;5;241m.\u001b[39mcompute()\n\u001b[0;32m 31\u001b[0m total_distance_traveled \u001b[38;5;241m=\u001b[39m total_distance_traveled\u001b[38;5;241m/\u001b[39mrows\n\u001b[0;32m 32\u001b[0m total_distance_traveled_list\u001b[38;5;241m.\u001b[39mappend(total_distance_traveled)\n",
"\u001b[1;31mAttributeError\u001b[0m: 'Series' object has no attribute 'iloc'"
]
}
],
"source": [
"data['Week'].nunique().compute()\n",
"people_staying = data.groupby(by = 'Week')['Population Staying at Home'].mean().compute()\n",
"print(people_staying,\" People staying\")\n",
"\n",
"\n",
"average_distance_per_trip = {\n",
" \"Number of Trips <1\": 1,\n",
" \"Number of Trips 1-25\": 13, \n",
" \"Number of Trips 1-3\": 2,\t\n",
" \"Number of Trips 10-25\" : 17.5,\n",
" \"Number of Trips 100-250\": 175,\n",
" \"Number of Trips 100+\": 101 , \n",
" \"Number of Trips 25-100\": 62.5, \n",
" \"Number of Trips 25-50\":37.5,\n",
" \"Number of Trips 250-500\": 375,\n",
" \"Number of Trips 3-5\": 4,\n",
" \"Number of Trips 5-10\": 7.5,\n",
" \"Number of Trips 50-100\": 75,\t\n",
" \"Number of Trips 500+\": 501\n",
"}\n",
"\n",
"total_distance_traveled = 0\n",
"total_distance_traveled_list = []\n",
"\n",
"for rows in data['Number of Trips']:\n",
" row_count= 0\n",
" for column_name in data.columns[6:]: # Adjusted column range to exclude columns until the 6th index\n",
" if column_name in average_distance_per_trip:\n",
" # Perform multiplication for the first item in each column and add to the total distance traveled\n",
" total_distance_traveled += (data[column_name] * average_distance_per_trip[column_name]).compute()\n",
" total_distance_traveled = total_distance_traveled/rows\n",
" total_distance_traveled_list.append(total_distance_traveled)\n",
" row_count = row_count + 1\n",
" \n",
"total = sum(total_distance_traveled_list)\n",
"\n",
"print(\"Total distance traveled by people who don't stay at home:\", total, \"miles\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trips_10_25 = data[data['Number of Trips 10-25'] > 10000000]\n",
"trips_50_100 = data[data['Number of Trips 50-100'] > 10000000]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}