Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
# Referenced Aula and the code Documentation
df_full = pd.read_csv('Trips_Full_Data.csv') # I changed what these are called because i kept getting confused.
df_distance = pd.read_csv('Trips_by_Distance.csv')
df_full = df_full.dropna() # Using pandas so this is slightly different from the way I did it in 1a
df_full = df_full.fillna(0)
df_full['Week'] = df_full['Week of Date'].str.extract(r'(\d+)', expand=False).astype(int) #https://www.geeksforgeeks.org/python-pandas-series-str-extract/
df_full_week32 = df_full[df_full['Week'] == 32] # Need to group by week32
merged_df = pd.merge(df_full_week32, df_distance, on='Level', how='inner') # merging the datasets based on level, all are 'national'
# https://www.w3schools.com/python/pandas/ref_df_merge.asp
# People vs Distance:
X = merged_df[['Trips 25-100 Miles']]
y = merged_df['Number of Trips 10-25']
#Linear model
model = LinearRegression()
model.fit(X, y)
print("Linear model:")
r_sq = model.score(X, y)
print("Coefficient of determination (R^2):", r_sq)
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)
y_pred = model.predict(X)
print("Predicted response:\n", y_pred)
# There r value is 0 so I don't think the relationship is linear. I am going to use polynomial now.
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X)
poly_model = LinearRegression()
poly_model.fit(X_poly, y)
print("Polynomial model:")
y_pred = poly_model.predict(X_poly)
print("Predicted response:\n", y_pred)
print("Intercept:", poly_model.intercept_)
print("Coefficients:", poly_model.coef_)
r_sq = poly_model.score(X_poly, y)
print("Coefficient of determination (R^2):", r_sq)
# Linear model:
# ('Coefficient of determination (R^2):', 0.0)T
# ('Intercept:', 179005342.22197562)
# ('Coefficients:', array([-4.52300271e-16]))
# ('Predicted response:\n', array([1.79005342e+08, 1.79005342e+08, 1.79005342e+08, ...,
# 1.79005342e+08, 1.79005342e+08, 1.79005342e+08]))
# Polynomial model:
# ('Predicted response:\n', array([1.79005342e+08, 1.79005342e+08, 1.79005342e+08, ...,
# 1.79005342e+08, 1.79005342e+08, 1.79005342e+08]))
# ('Intercept:', 179005345.47188395)
# ('Coefficients:', array([ 0.00000000e+00, -7.46003515e-08, 4.26695402e-16]))
# ('Coefficient of determination (R^2):', -2.220446049250313e-16)
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
# Initialize polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred = poly_model.predict(X_test_poly)
# Evaluate the model
r_squared = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print("Trained model R value (R^2):", r_squared)
print("Mean squared error:", mse)