Skip to content
Permalink
8771a0edd8
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
24 lines (19 sloc) 1 KB
import dask.dataframe as dd
# Load the CSV file
dataframe = dd.read_csv("/Users/thecreator/Downloads/Trips_by_Distance.csv", assume_missing=True)
# Check columns in the DataFrame
columns = dataframe.columns
print("Columns in the DataFrame:", columns)
# Ensure 'Year of Date' column is present or handle its absence gracefully
if 'Year of Date' in columns:
# Remove rows with any missing values
cleaned_dataframe = dataframe.dropna()
# Compute the number of unique values in the 'Year of Date' column
unique_year_count = cleaned_dataframe['Year of Date'].nunique().compute()
print(f"Unique 'Year of Date' values: {unique_year_count}")
# Calculate the mean of 'Trips 1-25 Miles' for each year
mean_trips_per_year = cleaned_dataframe.groupby('Year of Date')['Trips 1-25 Miles'].mean().compute()
print("Average 'Trips 1-25 Miles' per 'Year of Date':")
print(mean_trips_per_year)
else:
print("The 'Year of Date' column is missing from the DataFrame or has a different name.")