diff --git a/1e.py b/1e.py new file mode 100644 index 0000000..431c2f7 --- /dev/null +++ b/1e.py @@ -0,0 +1,27 @@ +import dask.dataframe as dd +import matplotlib.pyplot as plt + + +df_full = dd.read_csv('Trips_Full Data.csv') + + +distance_columns = [ + 'Trips <1 Mile', 'Trips 1-3 Miles', 'Trips 3-5 Miles', + 'Trips 5-10 Miles', 'Trips 10-25 Miles', 'Trips 25-50 Miles', + 'Trips 50-100 Miles', 'Trips 100-250 Miles', 'Trips 250-500 Miles', + 'Trips 500+ Miles' +] + +average_trips_each_distance = df_full[distance_columns].mean().compute() + +plt.figure(figsize=(15, 7)) +plt.bar(average_trips_each_distance.index, average_trips_each_distance.values, color='cadetblue') +plt.title('People Travelling vs. Distance') +plt.xlabel('Distance Category') +plt.ylabel('Average Number of Trips') +plt.xticks(rotation=45) +plt.tight_layout() + + +plt.savefig('people_travelling_vs_distance.png') +plt.close()