Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import time
import dask.dataframe as dd
from dask.distributed import Client
from dask.diagnostics import ProgressBar
def main():
n_processors = [10, 20]
n_processors_time = {}
for processor in n_processors:
print(f"\n\nStarting computation with {processor} processors...\n\n")
client = Client(n_workers=min(processor, 4), threads_per_worker=1)
start = time.time()
# Reading only necessary columns from the Trips by Distance data with Dask
df_dask = dd.read_csv(
'Trips_By_Distance.csv',
usecols=['Week', 'Population Staying at Home'],
dtype={'Week': 'int64', 'Population Staying at Home': 'float64'} # Explicitly defining data types
)
# Task a: Count unique weeks
unique_weeks = df_dask['Week'].nunique().compute()
print(f"Unique weeks in the dataset: {unique_weeks}")
# Task b: Calculate average population staying at home by week
avg_population_staying_home = df_dask.groupby('Week')['Population Staying at Home'].mean().compute()
print("Average Population Staying At Home per week (first few entries):\n", avg_population_staying_home.head())
dask_time = time.time() - start
n_processors_time[processor] = dask_time
print(f"\nTime with {processor} processors: {dask_time:.2f} seconds\n")
client.close()
print("\n\n", n_processors_time, "\n\n")
if __name__ == '__main__':
main()