import time
import dask.dataframe as dd
from dask.distributed import Client
from dask.diagnostics import ProgressBar
def main():
n_processors = [10, 20]
n_processors_time = {}
for processor in n_processors:
print(f"\n\nStarting computation with {processor} processors...\n\n")
client = Client(n_workers=min(processor, 4), threads_per_worker=1)
start = time.time()
# Reading only necessary columns from the Trips by Distance data with Dask
df_dask = dd.read_csv(
usecols=['Week', 'Population Staying at Home'],
dtype={'Week': 'int64', 'Population Staying at Home': 'float64'} # Explicitly defining data types
# Task a: Count unique weeks
unique_weeks = df_dask['Week'].nunique().compute()
print(f"Unique weeks in the dataset: {unique_weeks}")
# Task b: Calculate average population staying at home by week
avg_population_staying_home = df_dask.groupby('Week')['Population Staying at Home'].mean().compute()
print("Average Population Staying At Home per week (first few entries):\n", avg_population_staying_home.head())
dask_time = time.time() - start
n_processors_time[processor] = dask_time
print(f"\nTime with {processor} processors: {dask_time:.2f} seconds\n")
print("\n\n", n_processors_time, "\n\n")
if __name__ == '__main__':