Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
303COM-Artificial-Neural-Network-Network-Intrusion-Detection-System/ANN-NIDS.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
83 lines (79 sloc)
3.41 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import glob | |
import os | |
import re | |
import numpy | |
import pandas | |
import tensorflow as tf | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder, StandardScaler | |
# Import all csv | |
csvpath = "C:/Users/thetr/Downloads/MachineLearningCVE" | |
csv = [file for file in glob.glob(csvpath + "**/*.csv", recursive=True)] | |
# Add all csv files to panda dataframe | |
dataset = [pandas.read_csv(f) for f in csv] | |
# Combine all tables into one | |
dataset = pandas.concat([d for d in dataset]).drop_duplicates(keep=False) | |
dataset.reset_index(drop=True, inplace=True) | |
# Remove white spaces | |
col_names = [col.replace(' ', '') for col in dataset.columns] | |
dataset.columns = col_names | |
# Remove unusual characters using regex | |
label_names = dataset['Label'].unique() | |
label_names = [re.sub("[^a-zA-Z ]+", "", l) for l in label_names] | |
label_names = [re.sub("[\s\s]", '_', l) for l in label_names] | |
label_names = [lab.replace("__", "_") for lab in label_names] | |
# Replacing labels using label_names to replace the ones in the dataset | |
labels = dataset['Label'].unique() | |
for i in range(0, len(label_names)): | |
dataset['Label'] = dataset['Label'].replace({labels[i]: label_names[i]}) | |
# Remove null values | |
dataset.dropna(inplace=True) | |
# Removing non-finite values | |
labl = dataset['Label'] | |
dataset = dataset.loc[:, dataset.columns != 'Label'].astype('float64') | |
dataset = dataset.replace([numpy.inf, -numpy.inf], numpy.nan) | |
dataset = dataset.merge(labl, how='outer', left_index=True, right_index=True) | |
dataset.dropna(inplace=True) | |
# Seperating dataset into labels and features | |
labels = dataset['Label'] | |
features = dataset.loc[:, dataset.columns != 'Label'].astype('float64') | |
# Scaling data using RobustScaler | |
scaler = StandardScaler() | |
scaler.fit(features) | |
features = scaler.transform(features) | |
# Label encoding | |
labelenc = LabelEncoder() | |
labelenc.fit(labels) | |
labels = labelenc.transform(labels) | |
# Split training and testing data | |
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=.2) | |
# Clear variables | |
dataset = None | |
labl = None | |
features = None | |
labels = None | |
# Defining model using 78 neurons one for each feature | |
# Hidden layer has 67 which is 2/3 of input neuropns + output neurons | |
# Output layer has 15 neurons, one for each class | |
# Dropout of 0.2 to prevent over fitting | |
model = tf.keras.models.Sequential([ | |
tf.keras.layers.Flatten(input_shape=(78,)), | |
tf.keras.layers.Dense(67, activation='relu'), | |
tf.keras.layers.Dropout(0.2), | |
tf.keras.layers.Dense(15, activation='softmax') | |
]) | |
# Configure model with optimiser Adam, loss function as sparse_categorical_crossentropy | |
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) | |
# Configure tensorboard with callback from training logs | |
log_dir = os.path.join("train_logs") | |
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) | |
# Tensor flow callback for stopping training when the model is no longer improving, Best weights restored on early | |
# completion | |
eary_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True) | |
# Running model | |
model.fit(features_train, labels_train, epochs=100, callbacks=[tensorboard_callback, eary_stop_callback]) | |
# Evaluate model accuracy | |
model.evaluate(features_test, labels_test, verbose=2) | |
# Saving the model. | |
model.save('saved_models/IDS_model_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '.h5') |