Skip to content

Commit

Permalink
Create 1b.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ullaha18 committed Apr 12, 2024
1 parent e56ec66 commit c6df5a4
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions 1b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


df = pd.read_csv('Trips_by_Distance.csv')

df['Date'] = pd.to_datetime(df['Date'])

# Filter datasets for specific conditions
df_10_25 = df[df['Number of Trips 10-25'] > 10000000]
df_50_100 = df[df['Number of Trips 50-100'] > 10000000]

# Plot for 'Number of Trips 10-25'
plt.figure(figsize=(20, 10))
plt.scatter(df_10_25['Date'], df_10_25['Number of Trips 10-25'], color='blue')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.gcf().autofmt_xdate() # Automatically rotates dates for better readability
plt.title('Trips of 10-25 Miles Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Trips')
plt.tight_layout()
plt.savefig('trips_10_25_miles_over_time.png') # Saving the plot as PNG
plt.close() # Close the plot to avoid display overlap

# Plot for 'Number of Trips 50-100'
plt.figure(figsize=(20, 10))
plt.scatter(df_50_100['Date'], df_50_100['Number of Trips 50-100'], color='red')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.gcf().autofmt_xdate()
plt.title('Trips of 50-100 Miles Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Trips')
plt.tight_layout()
plt.savefig('trips_50_100_miles_over_time.png') # Save this plot as well
plt.close()




trips_full_data_cleaned = trips_full_data_cleaned.drop_duplicates()
trips_by_distance_cleaned = trips_by_distance.drop_duplicates()


trips_full_data_cleaned['Date'] = pd.to_datetime(trips_full_data_cleaned['Date'], errors='coerce')

trips_by_distance_cleaned.loc[trips_by_distance_cleaned['Distance'] > 1000, 'Distance'] = np.nan


print(trips_full_data_cleaned.isnull().sum())
print(trips_by_distance_cleaned.isnull().sum())

0 comments on commit c6df5a4

Please sign in to comment.