#!/usr/bin/env python | |

# coding: utf-8 | |

# In[1]: | |

import pandas as pd | |

import numpy as np | |

from sklearn.linear_model import LinearRegression | |

from sklearn.preprocessing import PolynomialFeatures | |

from sklearn.ensemble import RandomForestRegressor | |

from sklearn.metrics import r2_score | |

#Process the data | |

df = pd.read_csv('Cleaned_Trips_by_Distance.csv') | |

week_32 = df[df['Week'] == 31] | |

start_index = 88 | |

end_index = 94 | |

y = week_32['Number of Trips 10-25'].iloc[start_index:end_index + 1] | |

df_full = pd.read_csv('Trips_Full Data.csv') | |

x = df_full['Trips 25-100 Miles'].values.reshape((-1, 1)) | |

#Function to fit and calculate regression models | |

def fit_and_evaluate_model(model, x, y, model_name): | |

model.fit(x, y) | |

y_pred = model.predict(x) | |

r2 = r2_score(y, y_pred) | |

print(f"{model_name} - R-squared: {r2:.2f}") | |

#Linear model | |

linear_model = LinearRegression() | |

fit_and_evaluate_model(linear_model, x, y, "Linear Regression") | |

#Polynomial model | |

poly_features = PolynomialFeatures(degree=2) | |

x_poly = poly_features.fit_transform(x) | |

poly_model = LinearRegression() | |

fit_and_evaluate_model(poly_model, x_poly, y, "Polynomial Regression (Degree=2)") | |

#Random forest model | |

rf_model = RandomForestRegressor(n_estimators=100, random_state=42) | |

fit_and_evaluate_model(rf_model, x, y, "Random Forest Regression") | |

# In[2]: | |

import pandas as pd | |

import numpy as np | |

from sklearn.ensemble import RandomForestRegressor | |

from sklearn.metrics import r2_score, mean_squared_error | |

from dask_ml.model_selection import train_test_split | |

#Process the data | |

df = pd.read_csv('Cleaned_Trips_by_Distance.csv') | |

week_32 = df[df['Week'] == 31] | |

start_index = 88 | |

end_index = 94 | |

y = week_32['Number of Trips 10-25'].iloc[start_index:end_index + 1] | |

df_full = pd.read_csv('Trips_Full Data.csv') | |

x = df_full['Trips 25-100 Miles'].values.reshape((-1, 1)) | |

#Splitting the data into training and testing sets (80-20) | |

x_train, x_test, y_train, y_test = train_test_split( | |

x, y, test_size=0.2, random_state=42, shuffle=True | |

) | |

#Train the Random Forest model | |

rf_model = RandomForestRegressor(n_estimators=100, random_state=42) | |

rf_model.fit(x_train, y_train) | |

#Predictions on the training set | |

y_pred_train = rf_model.predict(x_train) | |

#Model performance on the training set (R-sq and MSE) | |

train_r2 = r2_score(y_train, y_pred_train) | |

train_mse = mean_squared_error(y_train, y_pred_train) | |

print(f"Random Forest Regression - R-squared on Training Set: {train_r2:.2f}") | |

print(f"Random Forest Regression - MSE on Training Set: {train_mse:.2f}") | |

#Predictions on the test set | |

y_pred_test = rf_model.predict(x_test) | |

#Model performance on the test set (R-sq and MSE) | |

test_r2 = r2_score(y_test, y_pred_test) | |

test_mse = mean_squared_error(y_test, y_pred_test) | |

print(f"Random Forest Regression - R-squared on Test Set: {test_r2:.2f}") | |

print(f"Random Forest Regression - MSE on Test Set: {test_mse:.2f}") | |

