Skip to content
Permalink
b97b64ea62
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
2989 lines (2989 sloc) 383 KB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3d1395e5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>1. Consent: By clicking on the \"I Agree\" button below, you are indicating that you have read and understood the information provided above, that you voluntarily agree to participate in this study, and that you are at least 18 years of age.</th>\n",
" <th>2. I see Myself as Someone Who...</th>\n",
" <th>2.1. Is talkative</th>\n",
" <th>2.2. Tends to find fault with others</th>\n",
" <th>2.3. Does a thorough job</th>\n",
" <th>2.4. Is depressed, blue</th>\n",
" <th>2.5. Is original, comes up with new ideas</th>\n",
" <th>2.6. Is reserved</th>\n",
" <th>2.7. Is helpful and unselfish with others</th>\n",
" <th>2.8. Can be somewhat careless</th>\n",
" <th>...</th>\n",
" <th>2.36. Is outgoing, sociable</th>\n",
" <th>2.37. Is sometimes rude to others</th>\n",
" <th>2.38. Makes plans and follows through with them</th>\n",
" <th>2.39. Gets nervous easily</th>\n",
" <th>2.40. Likes to reflect, play with ideas</th>\n",
" <th>2.41. Has few artistic interests</th>\n",
" <th>2.42. Likes to cooperate with others</th>\n",
" <th>2.43. Is easily distracted</th>\n",
" <th>2.44. Is sophisticated in art, music, or literature</th>\n",
" <th>3. What is your favourite dance style in the context of dance battles from the ones in the list below:</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>Breaking</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>Vogue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>I agree</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>88 rows × 47 columns</p>\n",
"</div>"
],
"text/plain": [
" 1. Consent: By clicking on the \"I Agree\" button below, you are indicating that you have read and understood the information provided above, that you voluntarily agree to participate in this study, and that you are at least 18 years of age. \\\n",
"0 I agree \n",
"1 I agree \n",
"2 I agree \n",
"3 I agree \n",
"4 I agree \n",
".. ... \n",
"83 I agree \n",
"84 I agree \n",
"85 I agree \n",
"86 I agree \n",
"87 I agree \n",
"\n",
" 2. I see Myself as Someone Who... 2.1. Is talkative \\\n",
"0 NaN 4.0 \n",
"1 NaN 4.0 \n",
"2 NaN NaN \n",
"3 NaN 5.0 \n",
"4 NaN NaN \n",
".. ... ... \n",
"83 NaN 3.0 \n",
"84 NaN 1.0 \n",
"85 NaN 4.0 \n",
"86 NaN 5.0 \n",
"87 NaN 4.0 \n",
"\n",
" 2.2. Tends to find fault with others 2.3. Does a thorough job \\\n",
"0 2.0 3.0 \n",
"1 3.0 2.0 \n",
"2 NaN NaN \n",
"3 2.0 2.0 \n",
"4 1.0 3.0 \n",
".. ... ... \n",
"83 4.0 3.0 \n",
"84 2.0 3.0 \n",
"85 2.0 3.0 \n",
"86 3.0 4.0 \n",
"87 2.0 3.0 \n",
"\n",
" 2.4. Is depressed, blue 2.5. Is original, comes up with new ideas \\\n",
"0 3.0 2.0 \n",
"1 3.0 4.0 \n",
"2 NaN NaN \n",
"3 2.0 4.0 \n",
"4 NaN 3.0 \n",
".. ... ... \n",
"83 4.0 3.0 \n",
"84 4.0 2.0 \n",
"85 2.0 5.0 \n",
"86 4.0 4.0 \n",
"87 2.0 3.0 \n",
"\n",
" 2.6. Is reserved 2.7. Is helpful and unselfish with others \\\n",
"0 3.0 5.0 \n",
"1 4.0 5.0 \n",
"2 NaN NaN \n",
"3 2.0 4.0 \n",
"4 5.0 5.0 \n",
".. ... ... \n",
"83 2.0 4.0 \n",
"84 5.0 3.0 \n",
"85 2.0 5.0 \n",
"86 2.0 3.0 \n",
"87 2.0 4.0 \n",
"\n",
" 2.8. Can be somewhat careless ... 2.36. Is outgoing, sociable \\\n",
"0 2.0 ... 4.0 \n",
"1 4.0 ... 4.0 \n",
"2 NaN ... NaN \n",
"3 5.0 ... 5.0 \n",
"4 3.0 ... 3.0 \n",
".. ... ... ... \n",
"83 3.0 ... 4.0 \n",
"84 2.0 ... 1.0 \n",
"85 4.0 ... 4.0 \n",
"86 3.0 ... 5.0 \n",
"87 4.0 ... 4.0 \n",
"\n",
" 2.37. Is sometimes rude to others \\\n",
"0 3.0 \n",
"1 3.0 \n",
"2 NaN \n",
"3 3.0 \n",
"4 3.0 \n",
".. ... \n",
"83 2.0 \n",
"84 3.0 \n",
"85 3.0 \n",
"86 2.0 \n",
"87 2.0 \n",
"\n",
" 2.38. Makes plans and follows through with them \\\n",
"0 2.0 \n",
"1 3.0 \n",
"2 NaN \n",
"3 3.0 \n",
"4 3.0 \n",
".. ... \n",
"83 5.0 \n",
"84 2.0 \n",
"85 1.0 \n",
"86 3.0 \n",
"87 4.0 \n",
"\n",
" 2.39. Gets nervous easily 2.40. Likes to reflect, play with ideas \\\n",
"0 4.0 2.0 \n",
"1 4.0 4.0 \n",
"2 NaN NaN \n",
"3 1.0 3.0 \n",
"4 3.0 3.0 \n",
".. ... ... \n",
"83 4.0 4.0 \n",
"84 3.0 1.0 \n",
"85 1.0 4.0 \n",
"86 3.0 3.0 \n",
"87 2.0 2.0 \n",
"\n",
" 2.41. Has few artistic interests 2.42. Likes to cooperate with others \\\n",
"0 2.0 5.0 \n",
"1 4.0 4.0 \n",
"2 NaN NaN \n",
"3 1.0 4.0 \n",
"4 3.0 5.0 \n",
".. ... ... \n",
"83 3.0 4.0 \n",
"84 4.0 2.0 \n",
"85 5.0 4.0 \n",
"86 1.0 4.0 \n",
"87 3.0 3.0 \n",
"\n",
" 2.43. Is easily distracted \\\n",
"0 4.0 \n",
"1 4.0 \n",
"2 NaN \n",
"3 4.0 \n",
"4 3.0 \n",
".. ... \n",
"83 5.0 \n",
"84 1.0 \n",
"85 5.0 \n",
"86 3.0 \n",
"87 4.0 \n",
"\n",
" 2.44. Is sophisticated in art, music, or literature \\\n",
"0 3.0 \n",
"1 4.0 \n",
"2 3.0 \n",
"3 3.0 \n",
"4 2.0 \n",
".. ... \n",
"83 4.0 \n",
"84 1.0 \n",
"85 5.0 \n",
"86 5.0 \n",
"87 2.0 \n",
"\n",
" 3. What is your favourite dance style in the context of dance battles from the ones in the list below: \n",
"0 House \n",
"1 Hip Hop \n",
"2 Dancehall \n",
"3 Dancehall \n",
"4 House \n",
".. ... \n",
"83 Breaking \n",
"84 Hip Hop \n",
"85 Vogue \n",
"86 Hip Hop \n",
"87 Hip Hop \n",
"\n",
"[88 rows x 47 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"Importing libraries and the dataset.\n",
"Personality/Dance Prefferences Dataset\n",
"6 attributes: 5 predictor attributes and a target variable.\n",
"\"\"\"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"df = pd.read_csv('dataset.csv')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a1ab0d35",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2.1. Is talkative</th>\n",
" <th>2.2. Tends to find fault with others</th>\n",
" <th>2.3. Does a thorough job</th>\n",
" <th>2.4. Is depressed, blue</th>\n",
" <th>2.5. Is original, comes up with new ideas</th>\n",
" <th>2.6. Is reserved</th>\n",
" <th>2.7. Is helpful and unselfish with others</th>\n",
" <th>2.8. Can be somewhat careless</th>\n",
" <th>2.9. Is relaxed, handles stress well</th>\n",
" <th>2.10. Is curious about many different things</th>\n",
" <th>...</th>\n",
" <th>2.36. Is outgoing, sociable</th>\n",
" <th>2.37. Is sometimes rude to others</th>\n",
" <th>2.38. Makes plans and follows through with them</th>\n",
" <th>2.39. Gets nervous easily</th>\n",
" <th>2.40. Likes to reflect, play with ideas</th>\n",
" <th>2.41. Has few artistic interests</th>\n",
" <th>2.42. Likes to cooperate with others</th>\n",
" <th>2.43. Is easily distracted</th>\n",
" <th>2.44. Is sophisticated in art, music, or literature</th>\n",
" <th>3. What is your favourite dance style in the context of dance battles from the ones in the list below:</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>Breaking</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>Vogue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>88 rows × 45 columns</p>\n",
"</div>"
],
"text/plain": [
" 2.1. Is talkative 2.2. Tends to find fault with others \\\n",
"0 4.0 2.0 \n",
"1 4.0 3.0 \n",
"2 NaN NaN \n",
"3 5.0 2.0 \n",
"4 NaN 1.0 \n",
".. ... ... \n",
"83 3.0 4.0 \n",
"84 1.0 2.0 \n",
"85 4.0 2.0 \n",
"86 5.0 3.0 \n",
"87 4.0 2.0 \n",
"\n",
" 2.3. Does a thorough job 2.4. Is depressed, blue \\\n",
"0 3.0 3.0 \n",
"1 2.0 3.0 \n",
"2 NaN NaN \n",
"3 2.0 2.0 \n",
"4 3.0 NaN \n",
".. ... ... \n",
"83 3.0 4.0 \n",
"84 3.0 4.0 \n",
"85 3.0 2.0 \n",
"86 4.0 4.0 \n",
"87 3.0 2.0 \n",
"\n",
" 2.5. Is original, comes up with new ideas 2.6. Is reserved \\\n",
"0 2.0 3.0 \n",
"1 4.0 4.0 \n",
"2 NaN NaN \n",
"3 4.0 2.0 \n",
"4 3.0 5.0 \n",
".. ... ... \n",
"83 3.0 2.0 \n",
"84 2.0 5.0 \n",
"85 5.0 2.0 \n",
"86 4.0 2.0 \n",
"87 3.0 2.0 \n",
"\n",
" 2.7. Is helpful and unselfish with others 2.8. Can be somewhat careless \\\n",
"0 5.0 2.0 \n",
"1 5.0 4.0 \n",
"2 NaN NaN \n",
"3 4.0 5.0 \n",
"4 5.0 3.0 \n",
".. ... ... \n",
"83 4.0 3.0 \n",
"84 3.0 2.0 \n",
"85 5.0 4.0 \n",
"86 3.0 3.0 \n",
"87 4.0 4.0 \n",
"\n",
" 2.9. Is relaxed, handles stress well \\\n",
"0 1.0 \n",
"1 3.0 \n",
"2 NaN \n",
"3 4.0 \n",
"4 3.0 \n",
".. ... \n",
"83 1.0 \n",
"84 5.0 \n",
"85 3.0 \n",
"86 3.0 \n",
"87 3.0 \n",
"\n",
" 2.10. Is curious about many different things ... \\\n",
"0 4.0 ... \n",
"1 4.0 ... \n",
"2 NaN ... \n",
"3 5.0 ... \n",
"4 5.0 ... \n",
".. ... ... \n",
"83 5.0 ... \n",
"84 2.0 ... \n",
"85 5.0 ... \n",
"86 4.0 ... \n",
"87 3.0 ... \n",
"\n",
" 2.36. Is outgoing, sociable 2.37. Is sometimes rude to others \\\n",
"0 4.0 3.0 \n",
"1 4.0 3.0 \n",
"2 NaN NaN \n",
"3 5.0 3.0 \n",
"4 3.0 3.0 \n",
".. ... ... \n",
"83 4.0 2.0 \n",
"84 1.0 3.0 \n",
"85 4.0 3.0 \n",
"86 5.0 2.0 \n",
"87 4.0 2.0 \n",
"\n",
" 2.38. Makes plans and follows through with them \\\n",
"0 2.0 \n",
"1 3.0 \n",
"2 NaN \n",
"3 3.0 \n",
"4 3.0 \n",
".. ... \n",
"83 5.0 \n",
"84 2.0 \n",
"85 1.0 \n",
"86 3.0 \n",
"87 4.0 \n",
"\n",
" 2.39. Gets nervous easily 2.40. Likes to reflect, play with ideas \\\n",
"0 4.0 2.0 \n",
"1 4.0 4.0 \n",
"2 NaN NaN \n",
"3 1.0 3.0 \n",
"4 3.0 3.0 \n",
".. ... ... \n",
"83 4.0 4.0 \n",
"84 3.0 1.0 \n",
"85 1.0 4.0 \n",
"86 3.0 3.0 \n",
"87 2.0 2.0 \n",
"\n",
" 2.41. Has few artistic interests 2.42. Likes to cooperate with others \\\n",
"0 2.0 5.0 \n",
"1 4.0 4.0 \n",
"2 NaN NaN \n",
"3 1.0 4.0 \n",
"4 3.0 5.0 \n",
".. ... ... \n",
"83 3.0 4.0 \n",
"84 4.0 2.0 \n",
"85 5.0 4.0 \n",
"86 1.0 4.0 \n",
"87 3.0 3.0 \n",
"\n",
" 2.43. Is easily distracted \\\n",
"0 4.0 \n",
"1 4.0 \n",
"2 NaN \n",
"3 4.0 \n",
"4 3.0 \n",
".. ... \n",
"83 5.0 \n",
"84 1.0 \n",
"85 5.0 \n",
"86 3.0 \n",
"87 4.0 \n",
"\n",
" 2.44. Is sophisticated in art, music, or literature \\\n",
"0 3.0 \n",
"1 4.0 \n",
"2 3.0 \n",
"3 3.0 \n",
"4 2.0 \n",
".. ... \n",
"83 4.0 \n",
"84 1.0 \n",
"85 5.0 \n",
"86 5.0 \n",
"87 2.0 \n",
"\n",
" 3. What is your favourite dance style in the context of dance battles from the ones in the list below: \n",
"0 House \n",
"1 Hip Hop \n",
"2 Dancehall \n",
"3 Dancehall \n",
"4 House \n",
".. ... \n",
"83 Breaking \n",
"84 Hip Hop \n",
"85 Vogue \n",
"86 Hip Hop \n",
"87 Hip Hop \n",
"\n",
"[88 rows x 45 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"In this section the data is being preproccesed (cleaned)\n",
"\n",
"\"\"\"\n",
"# drop the first two columns as they are not part of the actual dataset to be analysed\n",
"df = df.iloc[:, 2:]\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e9f6a20f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 43\n",
"4 2\n",
"5 1\n",
"17 1\n",
"18 2\n",
"20 13\n",
"22 1\n",
"24 1\n",
"29 1\n",
"31 1\n",
"33 1\n",
"34 11\n",
"41 1\n",
"47 3\n",
"53 2\n",
"59 2\n",
"66 43\n",
"67 1\n",
"73 1\n",
"76 1\n",
"81 1\n",
"dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"SECTION 1: MISSING VALUES\n",
"NaN values affect the model accuracy and sampling\n",
"\"\"\"\n",
"# count the number of null values for each row\n",
"null_counts = df[df.isnull().any(axis=1)].isnull().sum(axis=1)\n",
"null_counts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d61dc0ab",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2.1. Is talkative</th>\n",
" <th>2.2. Tends to find fault with others</th>\n",
" <th>2.3. Does a thorough job</th>\n",
" <th>2.4. Is depressed, blue</th>\n",
" <th>2.5. Is original, comes up with new ideas</th>\n",
" <th>2.6. Is reserved</th>\n",
" <th>2.7. Is helpful and unselfish with others</th>\n",
" <th>2.8. Can be somewhat careless</th>\n",
" <th>2.9. Is relaxed, handles stress well</th>\n",
" <th>2.10. Is curious about many different things</th>\n",
" <th>...</th>\n",
" <th>2.36. Is outgoing, sociable</th>\n",
" <th>2.37. Is sometimes rude to others</th>\n",
" <th>2.38. Makes plans and follows through with them</th>\n",
" <th>2.39. Gets nervous easily</th>\n",
" <th>2.40. Likes to reflect, play with ideas</th>\n",
" <th>2.41. Has few artistic interests</th>\n",
" <th>2.42. Likes to cooperate with others</th>\n",
" <th>2.43. Is easily distracted</th>\n",
" <th>2.44. Is sophisticated in art, music, or literature</th>\n",
" <th>3. What is your favourite dance style in the context of dance battles from the ones in the list below:</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>Popping</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>Popping</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 45 columns</p>\n",
"</div>"
],
"text/plain": [
" 2.1. Is talkative 2.2. Tends to find fault with others \\\n",
"20 1.0 NaN \n",
"34 1.0 NaN \n",
"\n",
" 2.3. Does a thorough job 2.4. Is depressed, blue \\\n",
"20 1.0 NaN \n",
"34 1.0 NaN \n",
"\n",
" 2.5. Is original, comes up with new ideas 2.6. Is reserved \\\n",
"20 1.0 NaN \n",
"34 1.0 NaN \n",
"\n",
" 2.7. Is helpful and unselfish with others 2.8. Can be somewhat careless \\\n",
"20 1.0 NaN \n",
"34 1.0 NaN \n",
"\n",
" 2.9. Is relaxed, handles stress well \\\n",
"20 1.0 \n",
"34 1.0 \n",
"\n",
" 2.10. Is curious about many different things ... \\\n",
"20 1.0 ... \n",
"34 1.0 ... \n",
"\n",
" 2.36. Is outgoing, sociable 2.37. Is sometimes rude to others \\\n",
"20 1.0 NaN \n",
"34 1.0 NaN \n",
"\n",
" 2.38. Makes plans and follows through with them \\\n",
"20 NaN \n",
"34 1.0 \n",
"\n",
" 2.39. Gets nervous easily 2.40. Likes to reflect, play with ideas \\\n",
"20 1.0 1.0 \n",
"34 1.0 1.0 \n",
"\n",
" 2.41. Has few artistic interests 2.42. Likes to cooperate with others \\\n",
"20 1.0 1.0 \n",
"34 NaN 1.0 \n",
"\n",
" 2.43. Is easily distracted \\\n",
"20 1.0 \n",
"34 1.0 \n",
"\n",
" 2.44. Is sophisticated in art, music, or literature \\\n",
"20 1.0 \n",
"34 1.0 \n",
"\n",
" 3. What is your favourite dance style in the context of dance battles from the ones in the list below: \n",
"20 Popping \n",
"34 Popping \n",
"\n",
"[2 rows x 45 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# display rows with indices 20 and 34 to inspect as they have many NaN values\n",
"\n",
"rows_to_display = [20, 34]\n",
"df_display = df.iloc[rows_to_display]\n",
"\n",
"df_display"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "15902c3b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4 2\n",
"5 1\n",
"17 1\n",
"18 2\n",
"22 1\n",
"24 1\n",
"29 1\n",
"31 1\n",
"33 1\n",
"41 1\n",
"47 3\n",
"53 2\n",
"59 2\n",
"67 1\n",
"73 1\n",
"76 1\n",
"81 1\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# drop rows with index labels 2 and 66 as they have 43out of 44 NaN\n",
"#drop rows 20 and 34 as they seem to be faulty (25% NaN and all the other values = 1.0)\n",
"df = df.drop([2, 66, 20, 34], axis=0)\n",
"\n",
"# recount the number of null values for verification\n",
"null_counts = df[df.isnull().any(axis=1)].isnull().sum(axis=1)\n",
"null_counts\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "51881feb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2.1. Is talkative</th>\n",
" <th>2.2. Tends to find fault with others</th>\n",
" <th>2.3. Does a thorough job</th>\n",
" <th>2.4. Is depressed, blue</th>\n",
" <th>2.5. Is original, comes up with new ideas</th>\n",
" <th>2.6. Is reserved</th>\n",
" <th>2.7. Is helpful and unselfish with others</th>\n",
" <th>2.8. Can be somewhat careless</th>\n",
" <th>2.9. Is relaxed, handles stress well</th>\n",
" <th>2.10. Is curious about many different things</th>\n",
" <th>...</th>\n",
" <th>2.36. Is outgoing, sociable</th>\n",
" <th>2.37. Is sometimes rude to others</th>\n",
" <th>2.38. Makes plans and follows through with them</th>\n",
" <th>2.39. Gets nervous easily</th>\n",
" <th>2.40. Likes to reflect, play with ideas</th>\n",
" <th>2.41. Has few artistic interests</th>\n",
" <th>2.42. Likes to cooperate with others</th>\n",
" <th>2.43. Is easily distracted</th>\n",
" <th>2.44. Is sophisticated in art, music, or literature</th>\n",
" <th>3. What is your favourite dance style in the context of dance battles from the ones in the list below:</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>...</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 45 columns</p>\n",
"</div>"
],
"text/plain": [
" 2.1. Is talkative 2.2. Tends to find fault with others \\\n",
"0 4.0 2.0 \n",
"1 4.0 3.0 \n",
"3 5.0 2.0 \n",
"4 0.0 1.0 \n",
"5 1.0 1.0 \n",
"\n",
" 2.3. Does a thorough job 2.4. Is depressed, blue \\\n",
"0 3.0 3.0 \n",
"1 2.0 3.0 \n",
"3 2.0 2.0 \n",
"4 3.0 0.0 \n",
"5 5.0 1.0 \n",
"\n",
" 2.5. Is original, comes up with new ideas 2.6. Is reserved \\\n",
"0 2.0 3.0 \n",
"1 4.0 4.0 \n",
"3 4.0 2.0 \n",
"4 3.0 5.0 \n",
"5 3.0 5.0 \n",
"\n",
" 2.7. Is helpful and unselfish with others 2.8. Can be somewhat careless \\\n",
"0 5.0 2.0 \n",
"1 5.0 4.0 \n",
"3 4.0 5.0 \n",
"4 5.0 3.0 \n",
"5 3.0 4.0 \n",
"\n",
" 2.9. Is relaxed, handles stress well \\\n",
"0 1.0 \n",
"1 3.0 \n",
"3 4.0 \n",
"4 3.0 \n",
"5 3.0 \n",
"\n",
" 2.10. Is curious about many different things ... \\\n",
"0 4.0 ... \n",
"1 4.0 ... \n",
"3 5.0 ... \n",
"4 5.0 ... \n",
"5 5.0 ... \n",
"\n",
" 2.36. Is outgoing, sociable 2.37. Is sometimes rude to others \\\n",
"0 4.0 3.0 \n",
"1 4.0 3.0 \n",
"3 5.0 3.0 \n",
"4 3.0 3.0 \n",
"5 4.0 1.0 \n",
"\n",
" 2.38. Makes plans and follows through with them 2.39. Gets nervous easily \\\n",
"0 2.0 4.0 \n",
"1 3.0 4.0 \n",
"3 3.0 1.0 \n",
"4 3.0 3.0 \n",
"5 4.0 4.0 \n",
"\n",
" 2.40. Likes to reflect, play with ideas 2.41. Has few artistic interests \\\n",
"0 2.0 2.0 \n",
"1 4.0 4.0 \n",
"3 3.0 1.0 \n",
"4 3.0 3.0 \n",
"5 4.0 1.0 \n",
"\n",
" 2.42. Likes to cooperate with others 2.43. Is easily distracted \\\n",
"0 5.0 4.0 \n",
"1 4.0 4.0 \n",
"3 4.0 4.0 \n",
"4 5.0 3.0 \n",
"5 3.0 3.0 \n",
"\n",
" 2.44. Is sophisticated in art, music, or literature \\\n",
"0 3.0 \n",
"1 4.0 \n",
"3 3.0 \n",
"4 2.0 \n",
"5 5.0 \n",
"\n",
" 3. What is your favourite dance style in the context of dance battles from the ones in the list below: \n",
"0 House \n",
"1 Hip Hop \n",
"3 Dancehall \n",
"4 House \n",
"5 Hip Hop \n",
"\n",
"[5 rows x 45 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"The oher NaN values will not affect the results considerabily\n",
"Approach: will not be taken in consideration when calculating personality indexes as they are all below 3/44 NaN records each.\n",
"The formula that needs to be applied according to Personality-BigFiveInventory is a simple addition, so the NaN should be replaced by 0 in order to stay neutral.\n",
"\"\"\"\n",
"\n",
"df.fillna(value=0.0, inplace=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a0b307e9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\DellG3\\AppData\\Local\\Temp\\ipykernel_15844\\3029970098.py:23: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n",
" 'Openness': df.iloc[:, openness_cols].sum(axis=1),\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Extraversion</th>\n",
" <th>Agreeableness</th>\n",
" <th>Conscientiousness</th>\n",
" <th>Neuroticism</th>\n",
" <th>Openness</th>\n",
" <th>Dance_Style</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>27.0</td>\n",
" <td>30.0</td>\n",
" <td>29.0</td>\n",
" <td>27.0</td>\n",
" <td>32.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>32.0</td>\n",
" <td>31.0</td>\n",
" <td>33.0</td>\n",
" <td>34.0</td>\n",
" <td>37.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>26.0</td>\n",
" <td>30.0</td>\n",
" <td>24.0</td>\n",
" <td>29.0</td>\n",
" <td>30.0</td>\n",
" <td>Dancehall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22.0</td>\n",
" <td>26.0</td>\n",
" <td>18.0</td>\n",
" <td>24.0</td>\n",
" <td>33.0</td>\n",
" <td>House</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>22.0</td>\n",
" <td>34.0</td>\n",
" <td>27.0</td>\n",
" <td>31.0</td>\n",
" <td>31.0</td>\n",
" <td>Hip Hop</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Extraversion Agreeableness Conscientiousness Neuroticism Openness \\\n",
"0 27.0 30.0 29.0 27.0 32.0 \n",
"1 32.0 31.0 33.0 34.0 37.0 \n",
"3 26.0 30.0 24.0 29.0 30.0 \n",
"4 22.0 26.0 18.0 24.0 33.0 \n",
"5 22.0 34.0 27.0 31.0 31.0 \n",
"\n",
" Dance_Style \n",
"0 House \n",
"1 Hip Hop \n",
"3 Dancehall \n",
"4 House \n",
"5 Hip Hop "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"SECTION 2: FORMULA\n",
"Extraversion: 1, 6R, 11, 16, 21R, 26, 31R, 36 \n",
"Agreeableness: 2R, 7, 12R, 17, 22, 27R, 32, 37R, 42 \n",
"Conscientiousness: 3, 8R, 13, 18R, 23R, 28, 33, 38, 43R \n",
"Neuroticism: 4, 9R, 14, 19, 24R, 29, 34R, 39 \n",
"Openness: 5, 10, 15, 20, 25, 30, 35R, 40, 41R, 44\n",
"\n",
"\"\"\"\n",
"# Define the column groups\n",
"extraversion_cols = [1, 6, 11, 16, 21, 26, 31, 36]\n",
"agreeableness_cols = [2, 7, 12, 17, 22, 27, 32, 37, 42]\n",
"conscientiousness_cols = [3, 8, 13, 18, 23, 28, 33, 38, 43]\n",
"neuroticism_cols = [4, 9, 14, 19, 24, 29, 34, 39]\n",
"openness_cols = [5, 10, 15, 20, 25, 30, 35, 40, 41, 44]\n",
"\n",
"# Create a new DataFrame with the sum of the specified columns for each category\n",
"category_df = pd.DataFrame({\n",
" 'Extraversion': df.iloc[:, extraversion_cols].sum(axis=1),\n",
" 'Agreeableness': df.iloc[:, agreeableness_cols].sum(axis=1),\n",
" 'Conscientiousness': df.iloc[:, conscientiousness_cols].sum(axis=1),\n",
" 'Neuroticism': df.iloc[:, neuroticism_cols].sum(axis=1),\n",
" 'Openness': df.iloc[:, openness_cols].sum(axis=1),\n",
" 'Dance_Style': df.iloc[:,-1]\n",
"})\n",
"\n",
"category_df.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "582781d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Extraversion</th>\n",
" <th>Agreeableness</th>\n",
" <th>Conscientiousness</th>\n",
" <th>Neuroticism</th>\n",
" <th>Openness</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.193076</td>\n",
" <td>-0.258444</td>\n",
" <td>-0.031753</td>\n",
" <td>-0.976851</td>\n",
" <td>0.068336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.403400</td>\n",
" <td>-0.034637</td>\n",
" <td>0.857323</td>\n",
" <td>0.441392</td>\n",
" <td>1.093373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.048989</td>\n",
" <td>-0.258444</td>\n",
" <td>-1.143097</td>\n",
" <td>-0.571639</td>\n",
" <td>-0.341679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.017249</td>\n",
" <td>-1.153674</td>\n",
" <td>-2.476711</td>\n",
" <td>-1.584670</td>\n",
" <td>0.273343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-1.017249</td>\n",
" <td>0.636786</td>\n",
" <td>-0.476290</td>\n",
" <td>-0.166427</td>\n",
" <td>-0.136672</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>0.435140</td>\n",
" <td>1.532016</td>\n",
" <td>0.857323</td>\n",
" <td>-0.369033</td>\n",
" <td>0.068336</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>-1.501379</td>\n",
" <td>-2.496519</td>\n",
" <td>-0.698559</td>\n",
" <td>-3.610732</td>\n",
" <td>-1.161708</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0.677205</td>\n",
" <td>0.189171</td>\n",
" <td>0.635054</td>\n",
" <td>0.441392</td>\n",
" <td>-0.341679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>-0.775184</td>\n",
" <td>-0.482252</td>\n",
" <td>1.079592</td>\n",
" <td>-0.774245</td>\n",
" <td>-0.341679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>-1.501379</td>\n",
" <td>0.189171</td>\n",
" <td>-0.698559</td>\n",
" <td>-1.787276</td>\n",
" <td>-1.161708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Extraversion Agreeableness Conscientiousness Neuroticism Openness\n",
"0 0.193076 -0.258444 -0.031753 -0.976851 0.068336\n",
"1 1.403400 -0.034637 0.857323 0.441392 1.093373\n",
"2 -0.048989 -0.258444 -1.143097 -0.571639 -0.341679\n",
"3 -1.017249 -1.153674 -2.476711 -1.584670 0.273343\n",
"4 -1.017249 0.636786 -0.476290 -0.166427 -0.136672\n",
".. ... ... ... ... ...\n",
"79 0.435140 1.532016 0.857323 -0.369033 0.068336\n",
"80 -1.501379 -2.496519 -0.698559 -3.610732 -1.161708\n",
"81 0.677205 0.189171 0.635054 0.441392 -0.341679\n",
"82 -0.775184 -0.482252 1.079592 -0.774245 -0.341679\n",
"83 -1.501379 0.189171 -0.698559 -1.787276 -1.161708\n",
"\n",
"[84 rows x 5 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"SECTION 3: Scaling the numerical features using StandardScaler\n",
"\"\"\"\n",
"X = category_df.loc[:, category_df.columns != 'Dance_Style'] # select all columns except target\n",
"y = category_df['Dance_Style'] # select only the target column\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"X_scaled = scaler.fit_transform(X)\n",
"\n",
"column_names =['Extraversion','Agreeableness','Conscientiousness','Neuroticism','Openness']\n",
"df_scaled = pd.DataFrame(X_scaled, columns=column_names)\n",
"df_scaled "
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3e95c3e5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\"\"\"\n",
"SECION 4: Encoding the target variable\n",
"\"\"\"\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"target_counts = y.value_counts()\n",
"sns.barplot(x=target_counts.index, y=target_counts.values)\n",
"plt.title('Class Distribution of Dance Styles')\n",
"plt.xlabel('Dance Styles')\n",
"plt.ylabel('Count')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "13d6abf5",
"metadata": {},
"outputs": [],
"source": [
"#combining Locking and Popping\n",
"y= y.replace({'Popping': 'Popping/Locking', 'Locking': 'Popping/Locking'})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "533107a2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Breaking</th>\n",
" <th>Dancehall</th>\n",
" <th>Hip Hop</th>\n",
" <th>House</th>\n",
" <th>Popping/Locking</th>\n",
" <th>Vogue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Breaking Dancehall Hip Hop House Popping/Locking Vogue\n",
"0 0 0 0 1 0 0\n",
"1 0 0 1 0 0 0\n",
"3 0 1 0 0 0 0\n",
"4 0 0 0 1 0 0\n",
"5 0 0 1 0 0 0\n",
".. ... ... ... ... ... ...\n",
"83 1 0 0 0 0 0\n",
"84 0 0 1 0 0 0\n",
"85 0 0 0 0 0 1\n",
"86 0 0 1 0 0 0\n",
"87 0 0 1 0 0 0\n",
"\n",
"[84 rows x 6 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#encoding the target text variables using One Hot Encoder \n",
"df_encoded = pd.get_dummies(y)\n",
"df_encoded"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e1e72352",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Breaking</th>\n",
" <th>Dancehall</th>\n",
" <th>Hip Hop</th>\n",
" <th>House</th>\n",
" <th>Popping/Locking</th>\n",
" <th>Vogue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Breaking Dancehall Hip Hop House Popping/Locking Vogue\n",
"0 0 0 0 1 0 0\n",
"1 0 0 1 0 0 0\n",
"2 0 1 0 0 0 0\n",
"3 0 0 0 1 0 0\n",
"4 0 0 1 0 0 0\n",
".. ... ... ... ... ... ...\n",
"79 1 0 0 0 0 0\n",
"80 0 0 1 0 0 0\n",
"81 0 0 0 0 0 1\n",
"82 0 0 1 0 0 0\n",
"83 0 0 1 0 0 0\n",
"\n",
"[84 rows x 6 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#reset the index that was originally affected by the dropped columns\n",
"df_encoded = df_encoded.reset_index(drop=True)\n",
"df_encoded"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "518cc734",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Breaking</th>\n",
" <th>Dancehall</th>\n",
" <th>Hip Hop</th>\n",
" <th>House</th>\n",
" <th>Vogue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" Breaking Dancehall Hip Hop House Vogue\n",
"0 0 0 0 1 0\n",
"1 0 0 1 0 0\n",
"2 0 1 0 0 0\n",
"3 0 0 0 1 0\n",
"4 0 0 1 0 0\n",
".. ... ... ... ... ...\n",
"79 1 0 0 0 0\n",
"80 0 0 1 0 0\n",
"81 0 0 0 0 1\n",
"82 0 0 1 0 0\n",
"83 0 0 1 0 0\n",
"\n",
"[84 rows x 5 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_encoded = df_encoded.drop('Popping/Locking', axis=1 )\n",
"df_encoded"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "abaa125f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Extraversion</th>\n",
" <th>Agreeableness</th>\n",
" <th>Conscientiousness</th>\n",
" <th>Neuroticism</th>\n",
" <th>Openness</th>\n",
" <th>Breaking</th>\n",
" <th>Dancehall</th>\n",
" <th>Hip Hop</th>\n",
" <th>House</th>\n",
" <th>Vogue</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.193076</td>\n",
" <td>-0.258444</td>\n",
" <td>-0.031753</td>\n",
" <td>-0.976851</td>\n",
" <td>0.068336</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.403400</td>\n",
" <td>-0.034637</td>\n",
" <td>0.857323</td>\n",
" <td>0.441392</td>\n",
" <td>1.093373</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.048989</td>\n",
" <td>-0.258444</td>\n",
" <td>-1.143097</td>\n",
" <td>-0.571639</td>\n",
" <td>-0.341679</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-1.017249</td>\n",
" <td>-1.153674</td>\n",
" <td>-2.476711</td>\n",
" <td>-1.584670</td>\n",
" <td>0.273343</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-1.017249</td>\n",
" <td>0.636786</td>\n",
" <td>-0.476290</td>\n",
" <td>-0.166427</td>\n",
" <td>-0.136672</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>0.435140</td>\n",
" <td>1.532016</td>\n",
" <td>0.857323</td>\n",
" <td>-0.369033</td>\n",
" <td>0.068336</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>-1.501379</td>\n",
" <td>-2.496519</td>\n",
" <td>-0.698559</td>\n",
" <td>-3.610732</td>\n",
" <td>-1.161708</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>0.677205</td>\n",
" <td>0.189171</td>\n",
" <td>0.635054</td>\n",
" <td>0.441392</td>\n",
" <td>-0.341679</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>-0.775184</td>\n",
" <td>-0.482252</td>\n",
" <td>1.079592</td>\n",
" <td>-0.774245</td>\n",
" <td>-0.341679</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>-1.501379</td>\n",
" <td>0.189171</td>\n",
" <td>-0.698559</td>\n",
" <td>-1.787276</td>\n",
" <td>-1.161708</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>84 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" Extraversion Agreeableness Conscientiousness Neuroticism Openness \\\n",
"0 0.193076 -0.258444 -0.031753 -0.976851 0.068336 \n",
"1 1.403400 -0.034637 0.857323 0.441392 1.093373 \n",
"2 -0.048989 -0.258444 -1.143097 -0.571639 -0.341679 \n",
"3 -1.017249 -1.153674 -2.476711 -1.584670 0.273343 \n",
"4 -1.017249 0.636786 -0.476290 -0.166427 -0.136672 \n",
".. ... ... ... ... ... \n",
"79 0.435140 1.532016 0.857323 -0.369033 0.068336 \n",
"80 -1.501379 -2.496519 -0.698559 -3.610732 -1.161708 \n",
"81 0.677205 0.189171 0.635054 0.441392 -0.341679 \n",
"82 -0.775184 -0.482252 1.079592 -0.774245 -0.341679 \n",
"83 -1.501379 0.189171 -0.698559 -1.787276 -1.161708 \n",
"\n",
" Breaking Dancehall Hip Hop House Vogue \n",
"0 0 0 0 1 0 \n",
"1 0 0 1 0 0 \n",
"2 0 1 0 0 0 \n",
"3 0 0 0 1 0 \n",
"4 0 0 1 0 0 \n",
".. ... ... ... ... ... \n",
"79 1 0 0 0 0 \n",
"80 0 0 1 0 0 \n",
"81 0 0 0 0 1 \n",
"82 0 0 1 0 0 \n",
"83 0 0 1 0 0 \n",
"\n",
"[84 rows x 10 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df= pd.concat([df_scaled, df_encoded], axis=1)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "5f3f1779",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 6000x4000 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\"\"\"\n",
"SECTION 5: Checking corelation\n",
"\"\"\"\n",
"#Create a corelation matrix to check the relation between Target and other Cols\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"plt.subplots(figsize=(60, 40))\n",
"sns.heatmap(df.corr(), annot = True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "6fa78d00",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"SECTION 6: Splitting into Train/Test\n",
"\"\"\"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"x = df.iloc[:, :5] \n",
"y = df.iloc[:, 5:] \n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8e15737c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.11538461538461539\n"
]
}
],
"source": [
"\"\"\"\n",
"In this section the data is being processed using Decision Tree\n",
"\"\"\"\n",
"\"\"\"\n",
"MODEL 1: SVC with cost sensitive approach\n",
"\"\"\"\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Compute the class weights based on the inverse of the class frequencies\n",
"class_weights = (1 / np.sum(y_train, axis=0)).to_dict()\n",
"\n",
"class_names = y_train.columns.tolist()\n",
"class_dict = {}\n",
"for i in range(len(class_names)):\n",
" class_dict[i] = class_weights[class_names[i]]\n",
" \n",
"# Create a SVM classifier with cost-sensitive approach\n",
"classifier = SVC(class_weight=class_dict)\n",
"\n",
"# Convert one-hot encoded target variable to array of class labels\n",
"y_train_labels = np.argmax(y_train.values, axis=1)\n",
"y_test_labels = np.argmax(y_test.values, axis=1)\n",
"y_train_labels = y_train_labels.ravel()\n",
"y_test_labels = y_test_labels.ravel()\n",
"\n",
"# Train the classifier using the training data\n",
"classifier.fit(X_train, y_train_labels)\n",
"\n",
"# Make predictions on the testing data\n",
"y_pred_labels = classifier.predict(X_test)\n",
"\n",
"# Compute the accuracy of the classifier\n",
"accuracy = accuracy_score(y_test_labels, y_pred_labels)\n",
"print(\"Accuracy:\", accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "72e5183a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.11538461538461539\n"
]
}
],
"source": [
"\"\"\"\n",
"MODEL 2: SVC with cost sensitive approach and OVO\n",
"\"\"\"\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Compute the class weights based on the inverse of the class frequencies\n",
"class_weights = (1 / np.sum(y_train, axis=0)).to_dict()\n",
"\n",
"class_names = y_train.columns.tolist()\n",
"class_dict = {}\n",
"for i in range(len(class_names)):\n",
" class_dict[i] = class_weights[class_names[i]]\n",
" \n",
"# Create a SVM classifier with cost-sensitive approach\n",
"classifier = SVC(class_weight=class_dict, decision_function_shape='ovo')\n",
"\n",
"# Convert one-hot encoded target variable to array of class labels\n",
"y_train_labels = np.argmax(y_train.values, axis=1)\n",
"y_test_labels = np.argmax(y_test.values, axis=1)\n",
"y_train_labels = y_train_labels.ravel()\n",
"y_test_labels = y_test_labels.ravel()\n",
"\n",
"# Train the classifier using the training data\n",
"classifier.fit(X_train, y_train_labels)\n",
"\n",
"# Make predictions on the testing data\n",
"y_pred_labels = classifier.predict(X_test)\n",
"\n",
"# Compute the accuracy of the classifier\n",
"accuracy = accuracy_score(y_test_labels, y_pred_labels)\n",
"print(\"Accuracy:\", accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "19ee093a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters: {'C': 100, 'degree': 2, 'gamma': 'auto', 'kernel': 'rbf'}\n",
"Accuracy: 0.37982456140350873\n"
]
}
],
"source": [
"\"\"\"\n",
"Hypertuning will be performed to enchance the accuraccy\n",
"\"\"\"\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier\n",
"\n",
"y_train_labels = np.argmax(y_train.values, axis=1)\n",
"y_test_labels = np.argmax(y_test.values, axis=1)\n",
"y_train_labels = y_train_labels.ravel()\n",
"y_test_labels = y_test_labels.ravel()\n",
"\n",
"# Compute the class weights based on the inverse of the class frequencies\n",
"class_weights = (1 / np.sum(y_train, axis=0)).to_dict()\n",
"\n",
"class_names = y_train.columns.tolist()\n",
"class_dict = {}\n",
"for i in range(len(class_names)):\n",
" class_dict[i] = class_weights[class_names[i]]\n",
"\n",
"param_grid = {'C': [0.1, 1, 10, 100],\n",
" 'kernel': ['linear', 'rbf', 'poly'],\n",
" 'degree': [2, 3, 4],\n",
" 'gamma': ['scale', 'auto']}\n",
"\n",
"svc = SVC(class_weight=class_dict)\n",
"\n",
"grid = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3)\n",
"grid.fit(X_train, y_train_labels)\n",
"\n",
"print(\"Best parameters:\", grid.best_params_)\n",
"print(\"Accuracy:\", grid.best_score_)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "dbf54c5a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Accuracy: 0.46\n",
"\n",
"Micro Precision: 0.46\n",
"Micro Recall: 0.46\n",
"Micro F1-score: 0.46\n",
"\n",
"Macro Precision: 0.36\n",
"Macro Recall: 0.33\n",
"Macro F1-score: 0.30\n",
"\n",
"Weighted Precision: 0.43\n",
"Weighted Recall: 0.46\n",
"Weighted F1-score: 0.42\n",
"\n",
"Classification Report\n",
"\n",
" precision recall f1-score support\n",
"\n",
" Breaking 1.00 0.33 0.50 3\n",
" Dancehall 0.29 0.67 0.40 3\n",
" Hip Hop 0.53 0.64 0.58 14\n",
" House 0.00 0.00 0.00 2\n",
" Vogue 0.00 0.00 0.00 4\n",
"\n",
" accuracy 0.46 26\n",
" macro avg 0.36 0.33 0.30 26\n",
"weighted avg 0.43 0.46 0.42 26\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\DellG3\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"C:\\Users\\DellG3\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"C:\\Users\\DellG3\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"C:\\Users\\DellG3\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n",
"C:\\Users\\DellG3\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\"\"\"\n",
"Best model fined by hypertuning - Precision, Recall and F1 score analysis \n",
"acc = accuracy_score(y_test_labels, y_pred_labels)\n",
"\"\"\"\n",
"classifier = SVC(class_weight=class_dict, C=100, degree=2, gamma='auto', kernel='rbf' )\n",
"\n",
"# Convert one-hot encoded target variable to array of class labels\n",
"y_train_labels = np.argmax(y_train.values, axis=1)\n",
"y_test_labels = np.argmax(y_test.values, axis=1)\n",
"y_train_labels = y_train_labels.ravel()\n",
"y_test_labels = y_test_labels.ravel()\n",
"\n",
"classifier.fit(X_train, y_train_labels)\n",
"y_pred_labels = classifier.predict(X_test)\n",
"\n",
"# Confusion matrix evaluation\n",
"from sklearn.metrics import confusion_matrix\n",
"confusion= confusion_matrix(y_test_labels, y_pred_labels)\n",
"confusion_matrix_plot=sns.heatmap(confusion, annot=True, xticklabels=['Breaking','Dancehall','Hip Hop','House', 'Vogue'], yticklabels=['Breaking','Dancehall','Hip Hop','House', 'Vogue'])\n",
"#confusion_matrix_plot.set(xlabel='Actual Values', ylabel='Predicted Values')\n",
"\n",
"#importing accuracy_score, precision_score, recall_score, f1_score\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
"print('\\nAccuracy: {:.2f}\\n'.format(accuracy_score(y_test_labels, y_pred_labels)))\n",
"\n",
"print('Micro Precision: {:.2f}'.format(precision_score(y_test_labels, y_pred_labels, average='micro')))\n",
"print('Micro Recall: {:.2f}'.format(recall_score(y_test_labels, y_pred_labels, average='micro')))\n",
"print('Micro F1-score: {:.2f}\\n'.format(f1_score(y_test_labels, y_pred_labels, average='micro')))\n",
"\n",
"print('Macro Precision: {:.2f}'.format(precision_score(y_test_labels, y_pred_labels, average='macro')))\n",
"print('Macro Recall: {:.2f}'.format(recall_score(y_test_labels, y_pred_labels, average='macro')))\n",
"print('Macro F1-score: {:.2f}\\n'.format(f1_score(y_test_labels, y_pred_labels, average='macro')))\n",
"\n",
"print('Weighted Precision: {:.2f}'.format(precision_score(y_test_labels, y_pred_labels, average='weighted')))\n",
"print('Weighted Recall: {:.2f}'.format(recall_score(y_test_labels, y_pred_labels, average='weighted')))\n",
"print('Weighted F1-score: {:.2f}'.format(f1_score(y_test_labels, y_pred_labels, average='weighted')))\n",
"\n",
"from sklearn.metrics import classification_report\n",
"print('\\nClassification Report\\n')\n",
"print(classification_report(y_test_labels, y_pred_labels, target_names=['Breaking','Dancehall','Hip Hop','House', 'Vogue']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "270fd370",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}