From c6df5a4b75ccd6fd0d858681a6d46eb0cec70e76 Mon Sep 17 00:00:00 2001 From: "Areba Ullah (ullaha18)" Date: Fri, 12 Apr 2024 17:47:18 +0100 Subject: [PATCH] Create 1b.py --- 1b.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 1b.py diff --git a/1b.py b/1b.py new file mode 100644 index 0000000..368c04e --- /dev/null +++ b/1b.py @@ -0,0 +1,53 @@ +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + + +df = pd.read_csv('Trips_by_Distance.csv') + +df['Date'] = pd.to_datetime(df['Date']) + +# Filter datasets for specific conditions +df_10_25 = df[df['Number of Trips 10-25'] > 10000000] +df_50_100 = df[df['Number of Trips 50-100'] > 10000000] + +# Plot for 'Number of Trips 10-25' +plt.figure(figsize=(20, 10)) +plt.scatter(df_10_25['Date'], df_10_25['Number of Trips 10-25'], color='blue') +plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) +plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) +plt.gcf().autofmt_xdate() # Automatically rotates dates for better readability +plt.title('Trips of 10-25 Miles Over Time') +plt.xlabel('Date') +plt.ylabel('Number of Trips') +plt.tight_layout() +plt.savefig('trips_10_25_miles_over_time.png') # Saving the plot as PNG +plt.close() # Close the plot to avoid display overlap + +# Plot for 'Number of Trips 50-100' +plt.figure(figsize=(20, 10)) +plt.scatter(df_50_100['Date'], df_50_100['Number of Trips 50-100'], color='red') +plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) +plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) +plt.gcf().autofmt_xdate() +plt.title('Trips of 50-100 Miles Over Time') +plt.xlabel('Date') +plt.ylabel('Number of Trips') +plt.tight_layout() +plt.savefig('trips_50_100_miles_over_time.png') # Save this plot as well +plt.close() + + + + +trips_full_data_cleaned = trips_full_data_cleaned.drop_duplicates() +trips_by_distance_cleaned = trips_by_distance.drop_duplicates() + + +trips_full_data_cleaned['Date'] = pd.to_datetime(trips_full_data_cleaned['Date'], errors='coerce') + +trips_by_distance_cleaned.loc[trips_by_distance_cleaned['Distance'] > 1000, 'Distance'] = np.nan + + +print(trips_full_data_cleaned.isnull().sum()) +print(trips_by_distance_cleaned.isnull().sum())