diff --git a/CW1.ipynb b/CW1.ipynb
new file mode 100644
index 0000000..898cebc
--- /dev/null
+++ b/CW1.ipynb
@@ -0,0 +1,812 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Installing Required Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)\n",
+ "Requirement already satisfied: nltk in /opt/anaconda3/lib/python3.12/site-packages (3.8.1)\n",
+ "Requirement already satisfied: scikit-learn in /opt/anaconda3/lib/python3.12/site-packages (1.4.2)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (1.26.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)\n",
+ "Requirement already satisfied: click in /opt/anaconda3/lib/python3.12/site-packages (from nltk) (8.1.7)\n",
+ "Requirement already satisfied: joblib in /opt/anaconda3/lib/python3.12/site-packages (from nltk) (1.4.2)\n",
+ "Requirement already satisfied: regex>=2021.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from nltk) (2023.10.3)\n",
+ "Requirement already satisfied: tqdm in /opt/anaconda3/lib/python3.12/site-packages (from nltk) (4.66.4)\n",
+ "Requirement already satisfied: scipy>=1.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from scikit-learn) (1.14.1)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from scikit-learn) (2.2.0)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+ "\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install pandas nltk scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Loading the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "df = pd.read_csv(\"rct_data.txt\", delimiter=\"\\t\",header=None)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Analysing the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape of the dataset: (500068, 5)\n",
+ "--------\n",
+ "Index([0, 1, 2, 3, 4], dtype='int64')\n",
+ "--------\n",
+ "Missing values: 0 0\n",
+ "1 0\n",
+ "2 0\n",
+ "3 0\n",
+ "4 0\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Shape of the dataset:\",df.shape)\n",
+ "print(\"--------\")\n",
+ "\n",
+ "print(df.columns)\n",
+ "print(\"--------\")\n",
+ "\n",
+ "# to find the missing values\n",
+ "print(\"Missing values: \",df.isnull().sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18439781 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Two patients subdued with a TASER® device: cas... | \n",
+ " In the United States, an increasing number of ... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18468833 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " A case of Takayasu arteritis causing subclavia... | \n",
+ " The American Heart Association website defines... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18481181 | \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " Pathophysiology of hypopituitarism in the sett... | \n",
+ " The complex pathophysiology of traumatic brain... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18728056 | \n",
+ " 1 | \n",
+ " 2011 | \n",
+ " The cardiovascular risk factor, soluble CD40 l... | \n",
+ " [BACKGROUND] Soluble CD40 ligand (sCD40L) is a... | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18790590 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Horner syndrome due to carotid dissection. | \n",
+ " [BACKGROUND] Internal carotid artery dissectio... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 \\\n",
+ "0 18439781 0 2011 Two patients subdued with a TASER® device: cas... \n",
+ "1 18468833 0 2011 A case of Takayasu arteritis causing subclavia... \n",
+ "2 18481181 0 2012 Pathophysiology of hypopituitarism in the sett... \n",
+ "3 18728056 1 2011 The cardiovascular risk factor, soluble CD40 l... \n",
+ "4 18790590 0 2011 Horner syndrome due to carotid dissection. \n",
+ "\n",
+ " 4 \n",
+ "0 In the United States, an increasing number of ... \n",
+ "1 The American Heart Association website defines... \n",
+ "2 The complex pathophysiology of traumatic brain... \n",
+ "3 [BACKGROUND] Soluble CD40 ligand (sCD40L) is a... \n",
+ "4 [BACKGROUND] Internal carotid artery dissectio... "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# first 5 rows of the dataset\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Assigning the headers based on dataset structure\n",
+ "df.columns = ['ID', 'label', 'year', 'title', 'abstract']\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " label | \n",
+ " year | \n",
+ " title | \n",
+ " abstract | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18439781 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Two patients subdued with a TASER® device: cas... | \n",
+ " In the United States, an increasing number of ... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18468833 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " A case of Takayasu arteritis causing subclavia... | \n",
+ " The American Heart Association website defines... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18481181 | \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " Pathophysiology of hypopituitarism in the sett... | \n",
+ " The complex pathophysiology of traumatic brain... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18728056 | \n",
+ " 1 | \n",
+ " 2011 | \n",
+ " The cardiovascular risk factor, soluble CD40 l... | \n",
+ " [BACKGROUND] Soluble CD40 ligand (sCD40L) is a... | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18790590 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Horner syndrome due to carotid dissection. | \n",
+ " [BACKGROUND] Internal carotid artery dissectio... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID label year title \\\n",
+ "0 18439781 0 2011 Two patients subdued with a TASER® device: cas... \n",
+ "1 18468833 0 2011 A case of Takayasu arteritis causing subclavia... \n",
+ "2 18481181 0 2012 Pathophysiology of hypopituitarism in the sett... \n",
+ "3 18728056 1 2011 The cardiovascular risk factor, soluble CD40 l... \n",
+ "4 18790590 0 2011 Horner syndrome due to carotid dissection. \n",
+ "\n",
+ " abstract \n",
+ "0 In the United States, an increasing number of ... \n",
+ "1 The American Heart Association website defines... \n",
+ "2 The complex pathophysiology of traumatic brain... \n",
+ "3 [BACKGROUND] Soluble CD40 ligand (sCD40L) is a... \n",
+ "4 [BACKGROUND] Internal carotid artery dissectio... "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Checking first five rows and headers\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Preprocessing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "import nltk\n",
+ "from nltk.corpus import stopwords"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package stopwords to /tmp/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "nltk_data_path = \"/tmp/nltk_data\" # Path to download stopwords\n",
+ "os.makedirs(nltk_data_path, exist_ok=True)\n",
+ "nltk.data.path.append(nltk_data_path)\n",
+ "\n",
+ "# Now download stopwords\n",
+ "nltk.download('stopwords', download_dir=nltk_data_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# converting all the text in the abstract column into lower case\n",
+ "df['abstract'] = df['abstract'].astype(str).str.lower()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# deleting special characters \n",
+ "# function to clean the text using regular expression\n",
+ "def clean_text(text):\n",
+ " text = re.sub(r'[^a-zA-Z\\s]', '', text)\n",
+ " return text\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# cleaned abstract colunmn data is added in the new column with name cleaned_abstract\n",
+ "df['cleaned_abstract'] = df['abstract'].apply(clean_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " label | \n",
+ " year | \n",
+ " title | \n",
+ " abstract | \n",
+ " cleaned_abstract | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18439781 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Two patients subdued with a TASER® device: cas... | \n",
+ " in the united states, an increasing number of ... | \n",
+ " in the united states an increasing number of l... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 18468833 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " A case of Takayasu arteritis causing subclavia... | \n",
+ " the american heart association website defines... | \n",
+ " the american heart association website defines... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18481181 | \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " Pathophysiology of hypopituitarism in the sett... | \n",
+ " the complex pathophysiology of traumatic brain... | \n",
+ " the complex pathophysiology of traumatic brain... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 18728056 | \n",
+ " 1 | \n",
+ " 2011 | \n",
+ " The cardiovascular risk factor, soluble CD40 l... | \n",
+ " [background] soluble cd40 ligand (scd40l) is a... | \n",
+ " background soluble cd ligand scdl is a powerfu... | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 18790590 | \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Horner syndrome due to carotid dissection. | \n",
+ " [background] internal carotid artery dissectio... | \n",
+ " background internal carotid artery dissection ... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ID label year title \\\n",
+ "0 18439781 0 2011 Two patients subdued with a TASER® device: cas... \n",
+ "1 18468833 0 2011 A case of Takayasu arteritis causing subclavia... \n",
+ "2 18481181 0 2012 Pathophysiology of hypopituitarism in the sett... \n",
+ "3 18728056 1 2011 The cardiovascular risk factor, soluble CD40 l... \n",
+ "4 18790590 0 2011 Horner syndrome due to carotid dissection. \n",
+ "\n",
+ " abstract \\\n",
+ "0 in the united states, an increasing number of ... \n",
+ "1 the american heart association website defines... \n",
+ "2 the complex pathophysiology of traumatic brain... \n",
+ "3 [background] soluble cd40 ligand (scd40l) is a... \n",
+ "4 [background] internal carotid artery dissectio... \n",
+ "\n",
+ " cleaned_abstract \n",
+ "0 in the united states an increasing number of l... \n",
+ "1 the american heart association website defines... \n",
+ "2 the complex pathophysiology of traumatic brain... \n",
+ "3 background soluble cd ligand scdl is a powerfu... \n",
+ "4 background internal carotid artery dissection ... "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# removing unnecessary columns\n",
+ "# Keep only the 'abstract' and 'label' columns\n",
+ "df = df[['abstract', 'label', 'cleaned_abstract']]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " abstract | \n",
+ " label | \n",
+ " cleaned_abstract | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " in the united states, an increasing number of ... | \n",
+ " 0 | \n",
+ " in the united states an increasing number of l... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " the american heart association website defines... | \n",
+ " 0 | \n",
+ " the american heart association website defines... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " the complex pathophysiology of traumatic brain... | \n",
+ " 0 | \n",
+ " the complex pathophysiology of traumatic brain... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " [background] soluble cd40 ligand (scd40l) is a... | \n",
+ " 1 | \n",
+ " background soluble cd ligand scdl is a powerfu... | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " [background] internal carotid artery dissectio... | \n",
+ " 0 | \n",
+ " background internal carotid artery dissection ... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " abstract label \\\n",
+ "0 in the united states, an increasing number of ... 0 \n",
+ "1 the american heart association website defines... 0 \n",
+ "2 the complex pathophysiology of traumatic brain... 0 \n",
+ "3 [background] soluble cd40 ligand (scd40l) is a... 1 \n",
+ "4 [background] internal carotid artery dissectio... 0 \n",
+ "\n",
+ " cleaned_abstract \n",
+ "0 in the united states an increasing number of l... \n",
+ "1 the american heart association website defines... \n",
+ "2 the complex pathophysiology of traumatic brain... \n",
+ "3 background soluble cd ligand scdl is a powerfu... \n",
+ "4 background internal carotid artery dissection ... "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Removing Stop words"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "stop_words =set(stopwords.words ('english'))\n",
+ "\n",
+ "# Function to remove stopwords\n",
+ "def remove_stopwords(text):\n",
+ " words = text.split()\n",
+ " words = [word for word in words if word not in stop_words]\n",
+ " return \" \".join(words)\n",
+ "\n",
+ "# Apply function\n",
+ "df['cleaned_abstract'] =df['cleaned_abstract'].apply(remove_stopwords)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Convert Text into Numerical Format Using TF-IDF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape of TF-IDF matrix: (500068, 5000)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "\n",
+ "# Initializing TF-IDF Vectorizer\n",
+ "vectorizer = TfidfVectorizer(max_features=5000) # Convert text into a numerical form\n",
+ "\n",
+ "# Fit and transform the text data\n",
+ "X = vectorizer.fit_transform(df['cleaned_abstract'])\n",
+ "\n",
+ "# Convert to DataFrame\n",
+ "X = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())\n",
+ "\n",
+ "# Target variable (labels)\n",
+ "y = df['label']\n",
+ "\n",
+ "# Display shape of transformed data\n",
+ "print(\"Shape of TF-IDF matrix:\", X.shape)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training Data Shape: (400054, 5000)\n",
+ "Testing Data Shape: (100014, 5000)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "# Split data into 80% training and 20% testing\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Check shape\n",
+ "print(\"Training Data Shape:\", X_train.shape)\n",
+ "print(\"Testing Data Shape:\", X_test.shape)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.naive_bayes import MultinomialNB\n",
+ "\n",
+ "# Initialize and train Naïve Bayes model\n",
+ "nb = MultinomialNB()\n",
+ "nb.fit(X_train, y_train)\n",
+ "\n",
+ "# Prediction on test data\n",
+ "y_pred_nb = nb.predict(X_test)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Model: Naive Bayes\n",
+ "Accuracy: 0.901603775471434\n",
+ "Precision: 0.7314131997588238\n",
+ "Recall: 0.7956609485368314\n",
+ "F1 Score: 0.7621855440902829\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
+ "\n",
+ "print(f\"\\nModel: Naive Bayes\")\n",
+ "print(\"Accuracy:\", accuracy_score(y_test, y_pred_nb))\n",
+ "print(\"Precision:\", precision_score(y_test, y_pred_nb))\n",
+ "print(\"Recall:\", recall_score(y_test, y_pred_nb))\n",
+ "print(\"F1 Score:\", f1_score(y_test, y_pred_nb))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Model: Logistic Regression\n",
+ "Accuracy: 0.9494970704101425\n",
+ "Precision: 0.9094084382103454\n",
+ "Recall: 0.827598385469223\n",
+ "F1 Score: 0.866576855007\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "\n",
+ "# Initialize and train Logistic Regression model\n",
+ "log_reg = LogisticRegression()\n",
+ "log_reg.fit(X_train, y_train)\n",
+ "\n",
+ "# Predict on test data\n",
+ "y_pred_log = log_reg.predict(X_test)\n",
+ "\n",
+ "print(f\"\\nModel: Logistic Regression\")\n",
+ "print(\"Accuracy:\", accuracy_score(y_test, y_pred_log))\n",
+ "print(\"Precision:\", precision_score(y_test, y_pred_log))\n",
+ "print(\"Recall:\", recall_score(y_test, y_pred_log))\n",
+ "print(\"F1 Score:\", f1_score(y_test, y_pred_log))\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}