diff --git a/Scripts/TransformerExample.ipynb b/Scripts/TransformerExample.ipynb
deleted file mode 100644
index e6c6dce..0000000
--- a/Scripts/TransformerExample.ipynb
+++ /dev/null
@@ -1,656 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from numpy import argmax\n",
-    "\n",
-    "import string \n",
-    "import nltk\n",
-    "import re\n",
-    "import demoji\n",
-    "\n",
-    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
-    "from nltk.corpus import stopwords\n",
-    "from nltk.stem import WordNetLemmatizer\n",
-    "from nltk.corpus import wordnet\n",
-    "from spellchecker import SpellChecker\n",
-    "\n",
-    "from copy import deepcopy\n",
-    "from sklearn.base import clone\n",
-    "from sklearn.metrics import brier_score_loss\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.pipeline import Pipeline, FeatureUnion\n",
-    "from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer,TfidfTransformer\n",
-    "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.base import BaseEstimator,TransformerMixin\n",
-    "\n",
-    "from imblearn.under_sampling import RandomUnderSampler\n",
-    "from imblearn.over_sampling import RandomOverSampler\n",
-    "\n",
-    "from joblib import parallel_backend\n",
-    "\n",
-    "from sklearn.svm import SVC, LinearSVC\n",
-    "from sklearn.naive_bayes import MultinomialNB, BernoulliNB, ComplementNB\n",
-    "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n",
-    "from sklearn.tree import DecisionTreeClassifier\n",
-    "from sklearn.neighbors import KNeighborsClassifier\n",
-    "\n",
-    "from sklearn.model_selection import RandomizedSearchCV\n",
-    "from skopt import BayesSearchCV\n",
-    "\n",
-    "from sklearn.calibration import CalibratedClassifierCV,calibration_curve\n",
-    "\n",
-    "from sklearn.metrics import confusion_matrix,f1_score,classification_report,precision_recall_curve\n",
-    "\n",
-    "from Modules.Transformers import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 1000 entries, 0 to 999\n",
-      "Data columns (total 26 columns):\n",
-      " #   Column           Non-Null Count  Dtype  \n",
-      "---  ------           --------------  -----  \n",
-      " 0   BIO_AUT          1000 non-null   bool   \n",
-      " 1   BIO_FOL          1000 non-null   int64  \n",
-      " 2   BIO_PER          1000 non-null   bool   \n",
-      " 3   BIO_PIC          1000 non-null   int64  \n",
-      " 4   BIO_POL          1000 non-null   bool   \n",
-      " 5   COMMENT          1000 non-null   object \n",
-      " 6   INTERACTION_COM  1000 non-null   int64  \n",
-      " 7   INTERACTION_NEM  1000 non-null   int64  \n",
-      " 8   INTERACTION_OEM  1000 non-null   float64\n",
-      " 9   INTERACTION_PEM  1000 non-null   int64  \n",
-      " 10  STATUS_ART       1000 non-null   float64\n",
-      " 11  STATUS_AVG       1000 non-null   float64\n",
-      " 12  STATUS_COM       1000 non-null   float64\n",
-      " 13  STATUS_EXT_PIC   1000 non-null   float64\n",
-      " 14  STATUS_EXT_VID   1000 non-null   float64\n",
-      " 15  STATUS_INT_PIC   1000 non-null   float64\n",
-      " 16  STATUS_INT_VID   1000 non-null   float64\n",
-      " 17  STATUS_NEM       1000 non-null   float64\n",
-      " 18  STATUS_OEM       1000 non-null   float64\n",
-      " 19  STATUS_OFF       1000 non-null   float64\n",
-      " 20  STATUS_PEM       1000 non-null   float64\n",
-      " 21  STATUS_POL       1000 non-null   float64\n",
-      " 22  STATUS_REM       1000 non-null   float64\n",
-      " 23  STATUS_SHA       1000 non-null   float64\n",
-      " 24  STATUS_STD       1000 non-null   float64\n",
-      " 25  TROLL            1000 non-null   bool   \n",
-      "dtypes: bool(4), float64(16), int64(5), object(1)\n",
-      "memory usage: 175.9+ KB\n"
-     ]
-    }
-   ],
-   "source": [
-    "df = pd.read_csv(\"./Datasets/Data.csv\", delimiter=\";\") # load data\n",
-    "df.info() "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X = df.iloc[:,1:] # comments\n",
-    "y = df.iloc[:,:1] # labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>BIO_AUT</th>\n",
-       "      <th>BIO_FOL</th>\n",
-       "      <th>BIO_PER</th>\n",
-       "      <th>BIO_PIC</th>\n",
-       "      <th>BIO_POL</th>\n",
-       "      <th>COMMENT</th>\n",
-       "      <th>INTERACTION_COM</th>\n",
-       "      <th>INTERACTION_NEM</th>\n",
-       "      <th>INTERACTION_OEM</th>\n",
-       "      <th>INTERACTION_PEM</th>\n",
-       "      <th>...</th>\n",
-       "      <th>STATUS_STD</th>\n",
-       "      <th>TEXT_AVG_SENTENCE_LENGTH</th>\n",
-       "      <th>TEXT_AVG_WORD_LENGTH</th>\n",
-       "      <th>TEXT_FREQWORDS</th>\n",
-       "      <th>TEXT_STOPWORDS</th>\n",
-       "      <th>TEXT_VERBS</th>\n",
-       "      <th>TEXT_WORDS</th>\n",
-       "      <th>TOXIC_PHRASES</th>\n",
-       "      <th>TOXIC_WORDS</th>\n",
-       "      <th>TROLL</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>False</td>\n",
-       "      <td>820</td>\n",
-       "      <td>False</td>\n",
-       "      <td>135</td>\n",
-       "      <td>True</td>\n",
-       "      <td>time truth need exposed regardless messenger</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>4.142857</td>\n",
-       "      <td>2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>False</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>6</td>\n",
-       "      <td>False</td>\n",
-       "      <td>broke every promise made  trump problem</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.068367</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>3.500000</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2</td>\n",
-       "      <td>16</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>False</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>3</td>\n",
-       "      <td>False</td>\n",
-       "      <td>joe please please please pay attention happeni...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.401628</td>\n",
-       "      <td>38.0</td>\n",
-       "      <td>3.956140</td>\n",
-       "      <td>11</td>\n",
-       "      <td>59</td>\n",
-       "      <td>22</td>\n",
-       "      <td>114</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>False</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>2</td>\n",
-       "      <td>False</td>\n",
-       "      <td>joe color u voter  matter buy gas food hurt ec...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>15</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.097033</td>\n",
-       "      <td>22.0</td>\n",
-       "      <td>3.227273</td>\n",
-       "      <td>1</td>\n",
-       "      <td>9</td>\n",
-       "      <td>2</td>\n",
-       "      <td>22</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>False</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>7</td>\n",
-       "      <td>True</td>\n",
-       "      <td>diesel almost  gallon end site yeah joe s beauty</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>10</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.107889</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>2.809524</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>0</td>\n",
-       "      <td>21</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 43 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   BIO_AUT  BIO_FOL  BIO_PER  BIO_PIC  BIO_POL  \\\n",
-       "0    False      820    False      135     True   \n",
-       "1    False        0     True        6    False   \n",
-       "2    False        0     True        3    False   \n",
-       "3    False        0     True        2    False   \n",
-       "4    False        0     True        7     True   \n",
-       "\n",
-       "                                             COMMENT  INTERACTION_COM  \\\n",
-       "0      time truth need exposed regardless messenger                 0   \n",
-       "1           broke every promise made  trump problem                 1   \n",
-       "2  joe please please please pay attention happeni...                4   \n",
-       "3  joe color u voter  matter buy gas food hurt ec...                1   \n",
-       "4   diesel almost  gallon end site yeah joe s beauty                2   \n",
-       "\n",
-       "   INTERACTION_NEM  INTERACTION_OEM  INTERACTION_PEM  ...  STATUS_STD  \\\n",
-       "0                0              1.0                5  ...    0.000000   \n",
-       "1                0              0.0                8  ...    0.068367   \n",
-       "2                0              0.0                6  ...    0.401628   \n",
-       "3                0              0.0               15  ...    0.097033   \n",
-       "4                0              0.0               10  ...    0.107889   \n",
-       "\n",
-       "   TEXT_AVG_SENTENCE_LENGTH  TEXT_AVG_WORD_LENGTH  TEXT_FREQWORDS  \\\n",
-       "0                       7.0              4.142857               2   \n",
-       "1                      16.0              3.500000               1   \n",
-       "2                      38.0              3.956140              11   \n",
-       "3                      22.0              3.227273               1   \n",
-       "4                      10.5              2.809524               1   \n",
-       "\n",
-       "   TEXT_STOPWORDS  TEXT_VERBS  TEXT_WORDS  TOXIC_PHRASES  TOXIC_WORDS  TROLL  \n",
-       "0               4           2          14              0            0   True  \n",
-       "1               6           2          16              0            0  False  \n",
-       "2              59          22         114              0            0  False  \n",
-       "3               9           2          22              0            1  False  \n",
-       "4               6           0          21              0            0  False  \n",
-       "\n",
-       "[5 rows x 43 columns]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dfFreqWords = pd.read_csv(\"./Datasets/FreqWords.csv\") # load data\n",
-    "dfBadPhrases = pd.read_csv(\"./Datasets/BadPhrases.csv\") # load data\n",
-    "dfBadWords = pd.read_csv(\"./Datasets/BadWords.csv\") # load data\n",
-    "\n",
-    "example = SentimentalTransformer(\"COMMENT\").transform(df[:100])   # Create example using 100 entries    \n",
-    "example = TextTransformer(\"COMMENT\",dfFreqWords[\"word\"].to_list()).transform(example)                      \n",
-    "example = ToxicTransformer(dfBadWords, dfBadPhrases, \"COMMENT\").transform(example) # Apply preprocessor class on comment\n",
-    "\n",
-    "example = example.reindex(sorted(example.columns), axis=1)\n",
-    "example.head() "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['BIO_AUT',\n",
-       " 'BIO_FOL',\n",
-       " 'BIO_PER',\n",
-       " 'BIO_PIC',\n",
-       " 'BIO_POL',\n",
-       " 'COMMENT',\n",
-       " 'INTERACTION_COM',\n",
-       " 'INTERACTION_NEM',\n",
-       " 'INTERACTION_OEM',\n",
-       " 'INTERACTION_PEM',\n",
-       " 'SENT_DIGITS',\n",
-       " 'SENT_EMOJIS',\n",
-       " 'SENT_GIFS',\n",
-       " 'SENT_HASHTAGS',\n",
-       " 'SENT_NAMES',\n",
-       " 'SENT_PICTURES',\n",
-       " 'SENT_SPECIAL_UNICODES',\n",
-       " 'SENT_UPPERCASE',\n",
-       " 'SENT_URLS',\n",
-       " 'STATUS_ART',\n",
-       " 'STATUS_AVG',\n",
-       " 'STATUS_COM',\n",
-       " 'STATUS_EXT_PIC',\n",
-       " 'STATUS_EXT_VID',\n",
-       " 'STATUS_INT_PIC',\n",
-       " 'STATUS_INT_VID',\n",
-       " 'STATUS_NEM',\n",
-       " 'STATUS_OEM',\n",
-       " 'STATUS_OFF',\n",
-       " 'STATUS_PEM',\n",
-       " 'STATUS_POL',\n",
-       " 'STATUS_REM',\n",
-       " 'STATUS_SHA',\n",
-       " 'STATUS_STD',\n",
-       " 'TEXT_AVG_SENTENCE_LENGTH',\n",
-       " 'TEXT_AVG_WORD_LENGTH',\n",
-       " 'TEXT_FREQWORDS',\n",
-       " 'TEXT_STOPWORDS',\n",
-       " 'TEXT_VERBS',\n",
-       " 'TEXT_WORDS',\n",
-       " 'TOXIC_PHRASES',\n",
-       " 'TOXIC_WORDS',\n",
-       " 'TROLL']"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "[\"BIO_AUT\",\n",
-    " \"BIO_FOL\",\n",
-    " \"BIO_PER\",\n",
-    " \"BIO_PIC\",\n",
-    " \"BIO_POL\",\n",
-    " \"COMMENT\",\n",
-    " \"INTERACTION_COM\",\n",
-    " \"INTERACTION_NEM\",\n",
-    " \"INTERACTION_OEM\",\n",
-    " \"INTERACTION_PEM\",\n",
-    " \"SENT_DIGITS\",\n",
-    " \"SENT_EMOJIS\",\n",
-    " \"SENT_GIFS\",\n",
-    " \"SENT_HASHTAGS\",\n",
-    " \"SENT_NAMES\",\n",
-    " \"SENT_PICTURES\",\n",
-    " \"SENT_SPECIAL_UNICODES\",\n",
-    " \"SENT_UPPERCASE\",\n",
-    " \"SENT_URLS\",\n",
-    " \"STATUS_ART\",\n",
-    " \"STATUS_AVG\",\n",
-    " \"STATUS_COM\",\n",
-    " \"STATUS_EXT_PIC\",\n",
-    " \"STATUS_EXT_VID\",\n",
-    " \"STATUS_INT_PIC\",\n",
-    " \"STATUS_INT_VID\",\n",
-    " \"STATUS_NEM\",\n",
-    " \"STATUS_OEM\",\n",
-    " \"STATUS_OFF\",\n",
-    " \"STATUS_PEM\",\n",
-    " \"STATUS_POL\",\n",
-    " \"STATUS_REM\",\n",
-    " \"STATUS_SHA\",\n",
-    " \"STATUS_STD\",\n",
-    " \"TEXT_AVG_SENTENCE_LENGTH\",\n",
-    " \"TEXT_AVG_WORD_LENGTH\",\n",
-    " \"TEXT_FREQWORDS\",\n",
-    " \"TEXT_STOPWORDS\",\n",
-    " \"TEXT_VERBS\",\n",
-    " \"TEXT_WORDS\",\n",
-    " \"TOXIC_PHRASES\",\n",
-    " \"TOXIC_WORDS\",\n",
-    " \"TROLL\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 100 entries, 0 to 99\n",
-      "Data columns (total 43 columns):\n",
-      " #   Column                    Non-Null Count  Dtype  \n",
-      "---  ------                    --------------  -----  \n",
-      " 0   BIO_AUT                   100 non-null    bool   \n",
-      " 1   BIO_FOL                   100 non-null    int64  \n",
-      " 2   BIO_PER                   100 non-null    bool   \n",
-      " 3   BIO_PIC                   100 non-null    int64  \n",
-      " 4   BIO_POL                   100 non-null    bool   \n",
-      " 5   COMMENT                   100 non-null    object \n",
-      " 6   INTERACTION_COM           100 non-null    int64  \n",
-      " 7   INTERACTION_NEM           100 non-null    int64  \n",
-      " 8   INTERACTION_OEM           100 non-null    float64\n",
-      " 9   INTERACTION_PEM           100 non-null    int64  \n",
-      " 10  SENT_DIGITS               100 non-null    int64  \n",
-      " 11  SENT_EMOJIS               100 non-null    int64  \n",
-      " 12  SENT_GIFS                 100 non-null    int64  \n",
-      " 13  SENT_HASHTAGS             100 non-null    int64  \n",
-      " 14  SENT_NAMES                100 non-null    int64  \n",
-      " 15  SENT_PICTURES             100 non-null    int64  \n",
-      " 16  SENT_SPECIAL_UNICODES     100 non-null    int64  \n",
-      " 17  SENT_UPPERCASE            100 non-null    int64  \n",
-      " 18  SENT_URLS                 100 non-null    int64  \n",
-      " 19  STATUS_ART                100 non-null    float64\n",
-      " 20  STATUS_AVG                100 non-null    float64\n",
-      " 21  STATUS_COM                100 non-null    float64\n",
-      " 22  STATUS_EXT_PIC            100 non-null    float64\n",
-      " 23  STATUS_EXT_VID            100 non-null    float64\n",
-      " 24  STATUS_INT_PIC            100 non-null    float64\n",
-      " 25  STATUS_INT_VID            100 non-null    float64\n",
-      " 26  STATUS_NEM                100 non-null    float64\n",
-      " 27  STATUS_OEM                100 non-null    float64\n",
-      " 28  STATUS_OFF                100 non-null    float64\n",
-      " 29  STATUS_PEM                100 non-null    float64\n",
-      " 30  STATUS_POL                100 non-null    float64\n",
-      " 31  STATUS_REM                100 non-null    float64\n",
-      " 32  STATUS_SHA                100 non-null    float64\n",
-      " 33  STATUS_STD                100 non-null    float64\n",
-      " 34  TEXT_AVG_SENTENCE_LENGTH  100 non-null    float64\n",
-      " 35  TEXT_AVG_WORD_LENGTH      100 non-null    float64\n",
-      " 36  TEXT_FREQWORDS            100 non-null    int64  \n",
-      " 37  TEXT_STOPWORDS            100 non-null    int64  \n",
-      " 38  TEXT_VERBS                100 non-null    int64  \n",
-      " 39  TEXT_WORDS                100 non-null    int64  \n",
-      " 40  TOXIC_PHRASES             100 non-null    int64  \n",
-      " 41  TOXIC_WORDS               100 non-null    int64  \n",
-      " 42  TROLL                     100 non-null    bool   \n",
-      "dtypes: bool(4), float64(18), int64(20), object(1)\n",
-      "memory usage: 31.0+ KB\n"
-     ]
-    }
-   ],
-   "source": [
-    "example.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dfComment = df[:1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def FreqWordCount(text):\n",
-    "    sentenceToken = nltk.sent_tokenize(text)\n",
-    "    counter = 0\n",
-    "    for sentence in sentenceToken:\n",
-    "        wordToken = nltk.word_tokenize(sentence)\n",
-    "        for _ , pos in nltk.pos_tag(wordToken):\n",
-    "            if pos in [\"VB\",\"VBG\",\"VBD\",\"VBN\",\"VBP\"]:\n",
-    "                counter += 1\n",
-    "    return counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text = \"this is my text. and you are gnoomed. lool yes you are dog told\"\n",
-    "FreqWordCount(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "F = FrequentWords(10,200)\n",
-    "example[\"COMMENT\"] = F.transform(example[\"COMMENT\"]) # Remove frequent words from comments\n",
-    "F.Show()    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cv = CountVectorizer(ngram_range = (2,3), max_features = 5000) # Using ngrams (bigrams and trigrams)\n",
-    "countMatrix = cv.fit_transform(example[\"COMMENT\"])             # Bag of words\n",
-    "countMatrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "countMatrix.toarray()[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cv.vocabulary_ # Display corpus "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Display bag of words\n",
-    "countTokens = cv.get_feature_names()\n",
-    "dfCount = pd.DataFrame(data=countMatrix.toarray(),columns=countTokens)\n",
-    "dfCount.head(10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Convert a collection of raw documents to a matrix of TF-IDF features\n",
-    "# Reflect how important a word is to a document in a collection or corpus.\n",
-    "tfid = TfidfTransformer() \n",
-    "tfidMatrix = tfid.fit_transform(countMatrix)\n",
-    "tfidMatrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create and display TF-IDF features\n",
-    "tfidCount = pd.DataFrame(data=tfidMatrix.toarray(),columns=countTokens) \n",
-    "tfidCount.head(10)"
-   ]
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "43f9e24b6ed1bb06037d4a0b70c9bd285081bf18f02f4b6444a4c7a3e23233e5"
-  },
-  "kernelspec": {
-   "display_name": "Python 3.8.8 ('base')",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

	BIO_AUT	BIO_FOL	BIO_PER	BIO_PIC	BIO_POL	COMMENT	INTERACTION_COM	INTERACTION_OEM	INTERACTION_PEM	...	STATUS_STD	TEXT_AVG_SENTENCE_LENGTH	TEXT_AVG_WORD_LENGTH	TEXT_FREQWORDS	TEXT_STOPWORDS	TEXT_VERBS	TEXT_WORDS	TOXIC_WORDS	TROLL
0	False	820	False	135	True	time truth need exposed regardless messenger	0	1.0	5	...	0.000000	7.0	4.142857	2	4	2	14	0	True
1	False	0	True	6	False	broke every promise made trump problem	1	0.0	8	...	0.068367	16.0	3.500000	1	6	2	16	0	False
2	False	0	True	3	False	joe please please please pay attention happeni...	4	0.0	6	...	0.401628	38.0	3.956140	11	59	22	114	0	False
3	False	0	True	2	False	joe color u voter matter buy gas food hurt ec...	1	0.0	15	...	0.097033	22.0	3.227273	1	9	2	22	1	False
4	False	0	True	7	True	diesel almost gallon end site yeah joe s beauty	2	0.0	10	...	0.107889	10.5	2.809524	1	6	0	21	0	False