Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
#Process the data
df = pd.read_csv('Cleaned_Trips_by_Distance.csv')
week_32 = df[df['Week'] == 31]
start_index = 88
end_index = 94
y = week_32['Number of Trips 10-25'].iloc[start_index:end_index + 1]
df_full = pd.read_csv('Trips_Full Data.csv')
x = df_full['Trips 25-100 Miles'].values.reshape((-1, 1))
#Function to fit and calculate regression models
def fit_and_evaluate_model(model, x, y, model_name):
model.fit(x, y)
y_pred = model.predict(x)
r2 = r2_score(y, y_pred)
print(f"{model_name} - R-squared: {r2:.2f}")
#Linear model
linear_model = LinearRegression()
fit_and_evaluate_model(linear_model, x, y, "Linear Regression")
#Polynomial model
poly_features = PolynomialFeatures(degree=2)
x_poly = poly_features.fit_transform(x)
poly_model = LinearRegression()
fit_and_evaluate_model(poly_model, x_poly, y, "Polynomial Regression (Degree=2)")
#Random forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
fit_and_evaluate_model(rf_model, x, y, "Random Forest Regression")
# In[2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from dask_ml.model_selection import train_test_split
#Process the data
df = pd.read_csv('Cleaned_Trips_by_Distance.csv')
week_32 = df[df['Week'] == 31]
start_index = 88
end_index = 94
y = week_32['Number of Trips 10-25'].iloc[start_index:end_index + 1]
df_full = pd.read_csv('Trips_Full Data.csv')
x = df_full['Trips 25-100 Miles'].values.reshape((-1, 1))
#Splitting the data into training and testing sets (80-20)
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=42, shuffle=True
)
#Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(x_train, y_train)
#Predictions on the training set
y_pred_train = rf_model.predict(x_train)
#Model performance on the training set (R-sq and MSE)
train_r2 = r2_score(y_train, y_pred_train)
train_mse = mean_squared_error(y_train, y_pred_train)
print(f"Random Forest Regression - R-squared on Training Set: {train_r2:.2f}")
print(f"Random Forest Regression - MSE on Training Set: {train_mse:.2f}")
#Predictions on the test set
y_pred_test = rf_model.predict(x_test)
#Model performance on the test set (R-sq and MSE)
test_r2 = r2_score(y_test, y_pred_test)
test_mse = mean_squared_error(y_test, y_pred_test)
print(f"Random Forest Regression - R-squared on Test Set: {test_r2:.2f}")
print(f"Random Forest Regression - MSE on Test Set: {test_mse:.2f}")
# In[ ]:
# In[ ]: