Classification with reduced training data

[1]:
# import libraries
import os
import csv
import time
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.metrics import accuracy_score

import xgboost as xgb
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn import metrics

from noisecut.model.noisecut_model import NoiseCut
from noisecut.model.noisecut_coder import Metric
from noisecut.tree_structured.data_manipulator import DataManipulator
from noisecut.tree_structured.sample_generator import SampleGenerator


# File path initialization
input_file_path = "../data/"  # Update this to your actual path
# List of dataset names
dataset_names = ["8D_E1" , "8D_E2" , "8D_E3" , "8D_E4" , "8D_E5" , "8D_E6"  ,
                 "9D_E1" , "9D_E2" , "9D_E3" , "9D_E4" , "9D_E5" , "9D_E6"  ,
                 "10D_E1", "10D_E2", "10D_E3", "10D_E4", "10D_E5", "10D_E6" ,
                 "11D_E1", "11D_E2", "11D_E3", "11D_E4", "11D_E5", "11D_E6" ,
                 "12D_E1", "12D_E2", "12D_E3", "12D_E4", "12D_E5", "12D_E6"]

NoiseCut

[2]:
# Set training size and noise intensity
Training_set_size = 30
Noise_intensity = 5

# a dictionary for storing the results
results_NC = {}

# Lists to store FPR and TPR values for each dataset
all_fprs_NC = []
all_tprs_NC = []

max_length = 100  # Choose a suitable maximum length for interpolation

# Loop through each file
i = 0  # loop counter
st = time.time()  # start timer
for dataset_name in dataset_names:
    print(f"Dataset: {dataset_name}")
    i += 1
    file_path = os.path.join(input_file_path, dataset_name)

    # Read the CSV file
    data = pd.read_csv(
        file_path, delimiter="    ", header=None, skiprows=1, engine="python"
    )
    # Split the data into X and Y
    X = data.iloc[:, :-1]
    Y = data.iloc[:, -1]

    # For loop for the number of repeating the experiment
    for j in range(5):
        # Add noise in data labeling. Then, train and test set split.
        manipulator = DataManipulator()
        x_noisy, y_noisy = manipulator.get_noisy_data(
            X, Y, percentage_noise=Noise_intensity
        )
        x_train, y_train, x_test, y_test = manipulator.split_data(
            x_noisy, y_noisy, percentage_training_data=Training_set_size
        )

        # Read the structure of the data
        with open(file_path, newline="") as csvfile:
            reader = csv.reader(csvfile, delimiter=" ")
            first_row = next(reader)  # Read the first row
        # Convert the string values to integers and store in an array
        data_structure = [int(value) for value in first_row if value.strip() != ""]

        # Fitting the hybrid model
        mdl = NoiseCut(
            n_input_each_box=data_structure
        )  # 'n_input_each_box' should fit to the generated data
        mdl.fit(x_train, y_train)

        # AUC-ROC
        y_pred_proba = mdl.predict_probability_of_being_1(x_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test.astype(int), y_pred_proba)

        # Interpolate or resample FPR and TPR to have the same length
        fpr_interp = np.interp(np.linspace(0, 1, max_length), fpr, fpr)
        tpr_interp = np.interp(np.linspace(0, 1, max_length), fpr, tpr)

        # Store the values in the dictionary
        results_NC[f"dataset_{i}"] = {
            "fpr": fpr_interp,
            "tpr": tpr_interp,
            "thresholds": thresholds,
        }

        # Append FPR and TPR values to the lists
        all_fprs_NC.append(fpr_interp)
        all_tprs_NC.append(tpr_interp)

run_time = time.time() - st
print("Runtime(seconds)=", run_time)

# Calculate average FPR and TPR across all datasets
avg_fpr_NC = np.mean(all_fprs_NC, axis=0)
avg_tpr_NC = np.mean(all_tprs_NC, axis=0)


# Sort the lists based on false positive rates for overall AUC-ROC
sorted_indices_NC = sorted(range(len(avg_fpr_NC)), key=lambda k: avg_fpr_NC[k])
avg_fpr_NC_sorted = [avg_fpr_NC[i] for i in sorted_indices_NC]
avg_tpr_NC_sorted = [avg_tpr_NC[i] for i in sorted_indices_NC]

# Calculate overall AUC-ROC
overall_auc_roc_NC = metrics.auc(avg_fpr_NC_sorted, avg_tpr_NC_sorted)
print("Overall AUC-ROC (NoiseCut)=", overall_auc_roc_NC)
Dataset: 8D_E1
Dataset: 8D_E2
Dataset: 8D_E3
Dataset: 8D_E4
Dataset: 8D_E5
Dataset: 8D_E6
Dataset: 9D_E1
Dataset: 9D_E2
Dataset: 9D_E3
Dataset: 9D_E4
Dataset: 9D_E5
Dataset: 9D_E6
Dataset: 10D_E1
Dataset: 10D_E2
Dataset: 10D_E3
Dataset: 10D_E4
Dataset: 10D_E5
Dataset: 10D_E6
Dataset: 11D_E1
Dataset: 11D_E2
Dataset: 11D_E3
Dataset: 11D_E4
Dataset: 11D_E5
Dataset: 11D_E6
Dataset: 12D_E1
Dataset: 12D_E2
Dataset: 12D_E3
Dataset: 12D_E4
Dataset: 12D_E5
Dataset: 12D_E6
Runtime(seconds)= 50.06166934967041
Overall AUC-ROC (NoiseCut)= 0.9107549929975035

XGBoost

[3]:
#####################
### XGBoost Model ###
#####################
def set_best_params_xgb(x_train, y_train, x_test, y_test):
    tuned_parameters = [
        {
            "learning_rate": [0.01, 0.1],
            "gamma": [0.4, 0.8],
            "max_depth": [6, 8],
            "n_estimators": [200, 400],
            "early_stopping_rounds": [10, 20],
        }
    ]

    # Tuning hyperparameters for classification accuracy
    grid_search = GridSearchCV(
        estimator=xgb.XGBClassifier(),
        param_grid=tuned_parameters,
        scoring="accuracy",
        cv=StratifiedKFold(n_splits=5, shuffle=True),
        verbose=0,
    )

    # Fit the grid search
    grid_search.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=0)

    return grid_search.best_params_

def xgb_model(x_train, y_train, x_test, y_test, best_param):
    clf = xgb.XGBClassifier(
        objective="binary:logistic",
        learning_rate=best_param["learning_rate"],
        gamma=best_param["gamma"],
        max_depth=best_param["max_depth"],
        n_estimators=best_param["n_estimators"],
        early_stopping_rounds=best_param["early_stopping_rounds"],
    )

    # Fitting the model with early stopping after 10 epochs to avoid overfitting
    clf = clf.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=0)

    # Return the predicted probabilities and labels
    return clf.predict_proba(x_test), clf.predict(x_test)


##############################
### XGBoost Implementation ###
##############################
# Set training size and noise intensity
Training_set_size = 30
Noise_intensity = 5

# a dictionary for storing the results
results_XGB = {}

# Lists to store TPR and FPR values for each dataset
all_tprs_xgb = []
all_fprs_xgb = []

# Choose a suitable maximum length for interpolation
max_length_xgb = 100

# Loop through each file
i = 0  # loop counter
st = time.time()  # start timer
for dataset_name in dataset_names:
    print(f"Dataset: {dataset_name}")
    i += 1
    file_path = os.path.join(input_file_path, dataset_name)

    # Read the CSV file
    data = pd.read_csv(
        file_path, delimiter="    ", header=None, skiprows=1, engine="python"
    )
    # Split the data into X and Y
    X = data.iloc[:, :-1]
    Y = data.iloc[:, -1]

    # Parameter tuning for each data set
    manipulator = DataManipulator()
    x_noisy, y_noisy = manipulator.get_noisy_data(
        X, Y, percentage_noise=Noise_intensity
    )
    x_train, y_train, x_test, y_test = manipulator.split_data(
        x_noisy, y_noisy, percentage_training_data=Training_set_size
    )

    best_params_xgb = set_best_params_xgb(x_train, y_train, x_test, y_test)
    #print(best_params_xgb)

    # For loop for the number of repeating the experiment
    for j in range(5):
        # Add noise in data labeling. Then, train and test set split.
        manipulator = DataManipulator()
        x_noisy, y_noisy = manipulator.get_noisy_data(
            X, Y, percentage_noise=Noise_intensity
        )
        x_train, y_train, x_test, y_test = manipulator.split_data(
            x_noisy, y_noisy, percentage_training_data=Training_set_size
        )

        # Fitting the XGBoost model
        y_pred_proba, y_pred = xgb_model(
            x_train, y_train, x_test, y_test, best_params_xgb
        )

        # AUC-ROC
        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba[:, 1])


        # Store the values in the dictionary
        results_XGB[f"dataset_{i}"] = {
            "fpr": fpr,
            "tpr": tpr,
            "thresholds": thresholds,
        }

        # Interpolate or resample TPR and FPR to have the same length
        tpr_interp_xgb = np.interp(np.linspace(0, 1, max_length_xgb), fpr, tpr)
        fpr_interp_xgb = np.interp(np.linspace(0, 1, max_length_xgb), fpr, fpr)

        # Append TPR and FPR values to the lists
        all_tprs_xgb.append(tpr_interp_xgb)
        all_fprs_xgb.append(fpr_interp_xgb)

run_time = time.time() - st
print("Runtime(seconds)=", run_time)

# Calculate average TPR and FPR across all datasets
avg_tpr_xgb = np.mean(all_tprs_xgb, axis=0)
avg_fpr_xgb = np.mean(all_fprs_xgb, axis=0)


# Sort the lists based on false positive rates for overall AUC-ROC
sorted_indices_xgb = sorted(range(len(avg_fpr_xgb)), key=lambda k: avg_fpr_xgb[k])
avg_fpr_xgb_sorted = [avg_fpr_xgb[i] for i in sorted_indices_xgb]
avg_tpr_xgb_sorted = [avg_tpr_xgb[i] for i in sorted_indices_xgb]

# Calculate overall AUC-ROC
overall_auc_roc_xgb = metrics.auc(avg_fpr_xgb_sorted, avg_tpr_xgb_sorted)
print("Overall AUC-ROC (XGBoost)=", overall_auc_roc_xgb)
Dataset: 8D_E1
Dataset: 8D_E2
Dataset: 8D_E3
Dataset: 8D_E4
Dataset: 8D_E5
Dataset: 8D_E6
Dataset: 9D_E1
Dataset: 9D_E2
Dataset: 9D_E3
Dataset: 9D_E4
Dataset: 9D_E5
Dataset: 9D_E6
Dataset: 10D_E1
Dataset: 10D_E2
Dataset: 10D_E3
Dataset: 10D_E4
Dataset: 10D_E5
Dataset: 10D_E6
Dataset: 11D_E1
Dataset: 11D_E2
Dataset: 11D_E3
Dataset: 11D_E4
Dataset: 11D_E5
Dataset: 11D_E6
Dataset: 12D_E1
Dataset: 12D_E2
Dataset: 12D_E3
Dataset: 12D_E4
Dataset: 12D_E5
Dataset: 12D_E6
Runtime(seconds)= 550.8305244445801
Overall AUC-ROC (XGBoost)= 0.8221711490267819

SVM

[4]:
#################
### SVM Model ###
#################
# Define the SVM parameter tuning function and the model
def set_best_params_svm(x_train, y_train, x_val, y_val):
    tuned_parameters = [
        {
            "C": [10, 100],
            "kernel": ["linear", "rbf"],
            "gamma": ["scale", "auto"],
        }
    ]

    # Tuning hyperparameters for classification accuracy
    grid_search = GridSearchCV(
        estimator=SVC(probability=True),
        param_grid=tuned_parameters,
        scoring="accuracy",
        cv=StratifiedKFold(n_splits=5, shuffle=True),
        verbose=0,
    )

    # Fit the grid search
    grid_search.fit(x_train, y_train)

    return grid_search.best_params_


def svm_model_with_early_stopping(x_train, y_train, x_val, y_val, max_no_improvement=5):
    clf = SVC(
        C=best_params_svm["C"],
        kernel=best_params_svm["kernel"],
        gamma=best_params_svm["gamma"],
        probability=True,
    )

    best_accuracy = 0
    no_improvement_count = 0

    # Fitting the model with early stopping
    for epoch in range(max_no_improvement):
        clf.fit(x_train, y_train)

        # Evaluate on the validation set
        val_pred = clf.predict(x_val)
        accuracy = metrics.accuracy_score(y_val, val_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            no_improvement_count = 0
        else:
            no_improvement_count += 1

        if no_improvement_count >= max_no_improvement:
            print(f"Early stopping after {epoch+1} epochs without improvement.")
            break

    # Return the trained model
    return clf

##########################
### SVM Implementation ###
##########################
# Set training size and noise intensity
Training_set_size = 30
Noise_intensity = 5

# a dictionary for storing the results
results_SVM = {}

# Lists to store TPR and FPR values for each dataset
all_tprs_svm = []
all_fprs_svm = []

# Choose a suitable maximum length for interpolation
max_length_svm = 100

# Loop through each file
i = 0  # loop counter
st = time.time()  # start timer
for dataset_name in dataset_names:
    print(f"Dataset: {dataset_name}")
    i += 1
    file_path = os.path.join(input_file_path, dataset_name)

    # Read the CSV file
    data = pd.read_csv(
        file_path, delimiter="    ", header=None, skiprows=1, engine="python"
    )
    # Split the data into X and Y
    X = data.iloc[:, :-1]
    Y = data.iloc[:, -1]

    # Parameter tuning for each dataset
    manipulator = DataManipulator()
    x_noisy, y_noisy = manipulator.get_noisy_data(
        X, Y, percentage_noise=Noise_intensity
    )
    x_train, y_train, x_val, y_val = manipulator.split_data(
        x_noisy, y_noisy, percentage_training_data=Training_set_size
    )

    # Selecting the best hyperparameters
    best_params_svm = set_best_params_svm(x_train, y_train, x_val, y_val)
    #print(best_params_svm)

    # For loop for the number of repeating the experiment
    for j in range(5):
        # Add noise in data labeling. Then, train and test set split.
        manipulator = DataManipulator()
        x_noisy, y_noisy = manipulator.get_noisy_data(
            X, Y, percentage_noise=Noise_intensity
        )
        x_train, y_train, x_test, y_test = manipulator.split_data(
            x_noisy, y_noisy, percentage_training_data=Training_set_size
        )

        # Fitting the SVM model with early stopping
        clf = svm_model_with_early_stopping(x_train, y_train, x_val, y_val)


        # make predictions for test data
        y_pred_proba = clf.predict_proba(x_val)
        y_pred = clf.predict(x_val)


        # AUC-ROC
        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba[:, 1])


        # Store the values in the dictionary
        results_SVM[f"dataset_{i}"] = {
            "fpr": fpr,
            "tpr": tpr,
            "thresholds": thresholds,
        }

        # Interpolate or resample TPR and FPR to have the same length
        tpr_interp_svm = np.interp(np.linspace(0, 1, max_length_svm), fpr, tpr)
        fpr_interp_svm = np.interp(np.linspace(0, 1, max_length_svm), fpr, fpr)

        # Append TPR and FPR values to the lists
        all_tprs_svm.append(tpr_interp_svm)
        all_fprs_svm.append(fpr_interp_svm)

run_time = time.time() - st
print("Runtime(seconds)=", run_time)

# Calculate average TPR and FPR across all datasets
avg_tpr_svm = np.mean(all_tprs_svm, axis=0)
avg_fpr_svm = np.mean(all_fprs_svm, axis=0)


# Sort the lists based on false positive rates for overall AUC-ROC
sorted_indices_svm = sorted(range(len(avg_fpr_svm)), key=lambda k: avg_fpr_svm[k])
avg_fpr_svm_sorted = [avg_fpr_svm[i] for i in sorted_indices_svm]
avg_tpr_svm_sorted = [avg_tpr_svm[i] for i in sorted_indices_svm]

# Calculate overall AUC-ROC
overall_auc_roc_svm = metrics.auc(avg_fpr_svm_sorted, avg_tpr_svm_sorted)
print("Overall AUC-ROC (SVM)=", overall_auc_roc_svm)
Dataset: 8D_E1
Dataset: 8D_E2
Dataset: 8D_E3
Dataset: 8D_E4
Dataset: 8D_E5
Dataset: 8D_E6
Dataset: 9D_E1
Dataset: 9D_E2
Dataset: 9D_E3
Dataset: 9D_E4
Dataset: 9D_E5
Dataset: 9D_E6
Dataset: 10D_E1
Dataset: 10D_E2
Dataset: 10D_E3
Dataset: 10D_E4
Dataset: 10D_E5
Dataset: 10D_E6
Dataset: 11D_E1
Dataset: 11D_E2
Dataset: 11D_E3
Dataset: 11D_E4
Dataset: 11D_E5
Dataset: 11D_E6
Dataset: 12D_E1
Dataset: 12D_E2
Dataset: 12D_E3
Dataset: 12D_E4
Dataset: 12D_E5
Dataset: 12D_E6
Runtime(seconds)= 493.1864011287689
Overall AUC-ROC (SVM)= 0.6073220983441359

Random Forest

[5]:
###########################
### Random Forest Model ###
###########################
# Define the RandomForest parameter tuning function and the model
def set_best_params_random_forest(x_train, y_train, x_val, y_val):
    tuned_parameters = [
        {
            "n_estimators": [100, 200],
            "max_depth": [None, 10],
            "min_samples_split": [2, 5],
            "min_samples_leaf": [1, 2],
        }
    ]

    # Tuning hyperparameters for classification accuracy
    grid_search = GridSearchCV(
        estimator=RandomForestClassifier(oob_score=True),
        param_grid=tuned_parameters,
        scoring="accuracy",
        cv=StratifiedKFold(n_splits=5, shuffle=True),
        verbose=0,
    )

    # Fit the grid search
    grid_search.fit(x_train, y_train)

    return grid_search.best_params_


def random_forest_model_with_early_stopping(x_train, y_train, x_val, y_val, max_no_improvement=5):
    clf = RandomForestClassifier(
        n_estimators=best_params_rf["n_estimators"],
        max_depth=best_params_rf["max_depth"],
        min_samples_split=best_params_rf["min_samples_split"],
        min_samples_leaf=best_params_rf["min_samples_leaf"],
        oob_score=True,
    )

    best_accuracy = 0
    no_improvement_count = 0

    # Fitting the model with early stopping
    for epoch in range(max_no_improvement):
        clf.fit(x_train, y_train)

        # Evaluate on the validation set
        val_pred = clf.predict(x_val)
        accuracy = accuracy_score(y_val, val_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            no_improvement_count = 0
        else:
            no_improvement_count += 1

        if no_improvement_count >= max_no_improvement:
            print(f"Early stopping after {epoch+1} epochs without improvement.")
            break

    # Return the trained model
    return clf

####################################
### Random Forest Implementation ###
####################################
# Set training size and noise intensity
Training_set_size = 30
Noise_intensity = 5

# a dictionary for storing the results
results_RF = {}

# Lists to store TPR and FPR values for each dataset
all_tprs_rf = []
all_fprs_rf = []

# Choose a suitable maximum length for interpolation
max_length_rf = 100

# Loop through each file
i = 0  # loop counter
st = time.time()  # start timer
for dataset_name in dataset_names:
    print(f"Dataset: {dataset_name}")
    i += 1
    file_path = os.path.join(input_file_path, dataset_name)

    # Read the CSV file
    data = pd.read_csv(
        file_path, delimiter="    ", header=None, skiprows=1, engine="python"
    )
    # Split the data into X and Y
    X = data.iloc[:, :-1]
    Y = data.iloc[:, -1]

    # Parameter tuning for each dataset
    manipulator = DataManipulator()
    x_noisy, y_noisy = manipulator.get_noisy_data(
        X, Y, percentage_noise=Noise_intensity
    )
    x_train, y_train, x_val, y_val = manipulator.split_data(
        x_noisy, y_noisy, percentage_training_data=Training_set_size
    )

    # Selecting the best hyperparameters
    best_params_rf = set_best_params_random_forest(x_train, y_train, x_val, y_val)
    #print(best_params_rf)

    # For loop for the number of repeating the experiment
    for j in range(5):
        # Add noise in data labeling. Then, train and test set split.
        manipulator = DataManipulator()
        x_noisy, y_noisy = manipulator.get_noisy_data(
            X, Y, percentage_noise=Noise_intensity
        )
        x_train, y_train, x_test, y_test = manipulator.split_data(
            x_noisy, y_noisy, percentage_training_data=Training_set_size
        )


        # Fitting the RandomForest model with early stopping
        clf = random_forest_model_with_early_stopping(x_train, y_train, x_val, y_val)

        # make predictions for test data
        y_pred_proba = clf.predict_proba(x_val)
        y_pred = clf.predict(x_val)

        # Evaluation metrics
        accuracy, recall, precision, F1 = Metric.set_confusion_matrix(
            y_val, y_pred
        )
        fpr, tpr, thresholds = metrics.roc_curve(
            y_val.astype(int), y_pred_proba[::, 1]
        )

        # AUC-ROC
        fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba[:, 1])


        # Store the values in the dictionary
        results_RF[f"dataset_{i}"] = {
            "fpr": fpr,
            "tpr": tpr,
            "thresholds": thresholds,
        }

        # Interpolate or resample TPR and FPR to have the same length
        tpr_interp_rf = np.interp(np.linspace(0, 1, max_length_rf), fpr, tpr)
        fpr_interp_rf = np.interp(np.linspace(0, 1, max_length_rf), fpr, fpr)

        # Append TPR and FPR values to the lists
        all_tprs_rf.append(tpr_interp_rf)
        all_fprs_rf.append(fpr_interp_rf)

run_time = time.time() - st
print("Runtime(seconds)=", run_time)

# Calculate average TPR and FPR across all datasets
avg_tpr_rf = np.mean(all_tprs_rf, axis=0)
avg_fpr_rf = np.mean(all_fprs_rf, axis=0)


# Sort the lists based on false positive rates for overall AUC-ROC
sorted_indices_rf = sorted(range(len(avg_fpr_rf)), key=lambda k: avg_fpr_rf[k])
avg_fpr_rf_sorted = [avg_fpr_rf[i] for i in sorted_indices_rf]
avg_tpr_rf_sorted = [avg_tpr_rf[i] for i in sorted_indices_rf]

# Calculate overall AUC-ROC
overall_auc_roc_rf = metrics.auc(avg_fpr_rf_sorted, avg_tpr_rf_sorted)
print("Overall AUC-ROC (Random Forest)=", overall_auc_roc_rf)
Dataset: 8D_E1
Dataset: 8D_E2
Dataset: 8D_E3
Dataset: 8D_E4
Dataset: 8D_E5
Dataset: 8D_E6
Dataset: 9D_E1
Dataset: 9D_E2
Dataset: 9D_E3
Dataset: 9D_E4
Dataset: 9D_E5
Dataset: 9D_E6
Dataset: 10D_E1
Dataset: 10D_E2
Dataset: 10D_E3
Dataset: 10D_E4
Dataset: 10D_E5
Dataset: 10D_E6
Dataset: 11D_E1
Dataset: 11D_E2
Dataset: 11D_E3
Dataset: 11D_E4
Dataset: 11D_E5
Dataset: 11D_E6
Dataset: 12D_E1
Dataset: 12D_E2
Dataset: 12D_E3
Dataset: 12D_E4
Dataset: 12D_E5
Dataset: 12D_E6
Runtime(seconds)= 645.1964688301086
Overall AUC-ROC (Random Forest)= 0.6020575977444443

Deep Neural Network

[6]:
###############################################################################################
# Note:                                                                                       #
# The calculations of deep neural network model are not included in this notebook to avoid    #
# complications of Tensorflow and its dependencies installation.                              #
# The codes are provided below just for illustration.                                         #
###############################################################################################
# # import Tensorflow related libraries
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
# from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# from tensorflow.keras.callbacks import EarlyStopping
# from sklearn.model_selection import train_test_split


# # Define the Neural Network function with variable hidden layer sizes
# def create_neural_network(hidden_layer_sizes=[64]):
#     model = Sequential()
#     for size in hidden_layer_sizes:
#         model.add(Dense(size, activation='relu'))
#     model.add(Dense(1, activation='sigmoid'))
#     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#     return model

# # Neural Network implementation with hyperparameter tuning using GridSearchCV
# # a dictionary for storing the results
# results_NeuralNetwork = {}

# # Set training size and noise intensity
# Training_set_size = 30
# Noise_intensity = 5

# # a dictionary for storing the results
# results_NN = {}

# # Lists to store TPR and FPR values for each dataset
# all_tprs_nn = []
# all_fprs_nn = []

# # Choose a suitable maximum length for interpolation
# max_length_nn = 100

# # Loop through each file
# i = 0  # loop counter
# st = time.time()  # start timer
# for dataset_name in dataset_names:
#     print(f"Dataset: {dataset_name}")
#     i += 1
#     file_path = os.path.join(input_file_path, dataset_name)

#     # Read the CSV file
#     data = pd.read_csv(
#         file_path, delimiter="    ", header=None, skiprows=1, engine="python"
#     )
#     # Split the data into X and Y
#     X = data.iloc[:, :-1]
#     Y = data.iloc[:, -1]

#     # Parameter tuning for each dataset
#     manipulator = DataManipulator()
#     x_noisy, y_noisy = manipulator.get_noisy_data(
#         X, Y, percentage_noise=Noise_intensity
#     )

#     x_train, x_val, y_train, y_val = train_test_split(
#         x_noisy, y_noisy, test_size=0.7, random_state=42
#     )

#     # Define the parameter grid for hyperparameter tuning
#     param_grid = {
#         'hidden_layer_sizes': [(128,), (256,), (512,),
#                                (256, 128), (512, 256),
#                                (256, 128, 64), (512, 256, 128)],
#         'epochs': [50, 100, 200],
#     }

#     # Build and train the Neural Network with hyperparameter tuning
#     model = KerasClassifier(build_fn=create_neural_network, verbose=0)
#     early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
#     grid_search = GridSearchCV(
#         estimator=model,
#         param_grid=param_grid,
#         scoring='accuracy',
#         cv=StratifiedKFold(n_splits=5, shuffle=True),
#         verbose=0
#     )
#     grid_result = grid_search.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[early_stopping])

#     # Get the best parameters from the grid search
#     best_params = grid_result.best_params_


#     # For loop for the number of repeating the experiment
#     for j in range(5):
#         # Add noise in data labeling. Then, train and test set split.
#         manipulator = DataManipulator()
#         x_noisy, y_noisy = manipulator.get_noisy_data(
#             X, Y, percentage_noise=Noise_intensity
#         )
#         x_train, x_val, y_train, y_val = train_test_split(
#             x_noisy, y_noisy, test_size=0.7, random_state=42
#         )

#         # Build the final model with the best parameters
#         final_model = create_neural_network(hidden_layer_sizes=best_params['hidden_layer_sizes'])
#         final_model.fit(x_train, y_train, epochs=best_params['epochs'], validation_data=(x_val, y_val), verbose=0, callbacks=[early_stopping])

#         # make predictions for validation data
#         y_pred_proba = final_model.predict(x_val)
#         y_pred = (y_pred_proba > 0.5).astype(int)

#         # Evaluation metrics
#         accuracy, recall, precision, F1 = Metric.set_confusion_matrix(
#             y_val, y_pred
#         )
#         fpr, tpr, thresholds = metrics.roc_curve(
#             y_val.astype(int), y_pred_proba
#         )


#         # Store the values in the dictionary
#         results_NN[f"dataset_{i}"] = {
#             "fpr": fpr,
#             "tpr": tpr,
#             "thresholds": thresholds,
#         }

#         # Interpolate or resample TPR and FPR to have the same length
#         tpr_interp_nn = np.interp(np.linspace(0, 1, max_length_nn), fpr, tpr)
#         fpr_interp_nn = np.interp(np.linspace(0, 1, max_length_nn), fpr, fpr)

#         # Append TPR and FPR values to the lists
#         all_tprs_nn.append(tpr_interp_nn)
#         all_fprs_nn.append(fpr_interp_nn)

# run_time = time.time() - st
# print("Runtime(seconds)=", run_time)

# # Calculate average TPR and FPR across all datasets
# avg_tpr_nn = np.mean(all_tprs_nn, axis=0)
# avg_fpr_nn = np.mean(all_fprs_nn, axis=0)


# # Sort the lists based on false positive rates for overall AUC-ROC
# sorted_indices_nn = sorted(range(len(avg_fpr_nn)), key=lambda k: avg_fpr_nn[k])
# avg_fpr_nn_sorted = [avg_fpr_nn[i] for i in sorted_indices_nn]
# avg_tpr_nn_sorted = [avg_tpr_nn[i] for i in sorted_indices_nn]

# # Calculate overall AUC-ROC
# overall_auc_roc_nn = metrics.auc(avg_fpr_nn_sorted, avg_tpr_nn_sorted)
# print("Overall AUC-ROC (Random Forest)=", overall_auc_roc_nn)
2024-02-08 16:45:31.414645: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Dataset: 8D_E1
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2024-02-08 16:45:32.733028: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
1/1 [==============================] - 0s 140ms/step
1/1 [==============================] - 0s 43ms/step
1/1 [==============================] - 0s 58ms/step
1/1 [==============================] - 0s 42ms/step
WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fb67567ec20> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
1/1 [==============================] - 0s 82ms/step
WARNING:tensorflow:6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fb6752da560> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
6/6 [==============================] - 0s 10ms/step
Dataset: 8D_E2
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 42ms/step
1/1 [==============================] - 0s 135ms/step
1/1 [==============================] - 0s 63ms/step
1/1 [==============================] - 0s 64ms/step
1/1 [==============================] - 0s 121ms/step
6/6 [==============================] - 0s 6ms/step
Dataset: 8D_E3
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 46ms/step
1/1 [==============================] - 0s 134ms/step
1/1 [==============================] - 0s 42ms/step
1/1 [==============================] - 0s 55ms/step
1/1 [==============================] - 0s 82ms/step
6/6 [==============================] - 0s 3ms/step
Dataset: 8D_E4
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 65ms/step
1/1 [==============================] - 0s 99ms/step
1/1 [==============================] - 0s 78ms/step
1/1 [==============================] - 0s 71ms/step
1/1 [==============================] - 0s 172ms/step
6/6 [==============================] - 0s 11ms/step
Dataset: 8D_E5
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 112ms/step
1/1 [==============================] - 0s 74ms/step
1/1 [==============================] - 0s 129ms/step
1/1 [==============================] - 0s 41ms/step
1/1 [==============================] - 0s 42ms/step
6/6 [==============================] - 0s 6ms/step
Dataset: 8D_E6
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 51ms/step
1/1 [==============================] - 0s 82ms/step
1/1 [==============================] - 0s 41ms/step
1/1 [==============================] - 0s 113ms/step
1/1 [==============================] - 0s 115ms/step
6/6 [==============================] - 0s 2ms/step
Dataset: 9D_E1
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 88ms/step
1/1 [==============================] - 0s 51ms/step
1/1 [==============================] - 0s 85ms/step
1/1 [==============================] - 0s 41ms/step
1/1 [==============================] - 0s 43ms/step
12/12 [==============================] - 0s 2ms/step
Dataset: 9D_E2
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 45ms/step
1/1 [==============================] - 0s 95ms/step
1/1 [==============================] - 0s 111ms/step
1/1 [==============================] - 0s 124ms/step
1/1 [==============================] - 0s 94ms/step
12/12 [==============================] - 0s 2ms/step
Dataset: 9D_E3
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 44ms/step
1/1 [==============================] - 0s 43ms/step
1/1 [==============================] - 0s 42ms/step
1/1 [==============================] - 0s 77ms/step
1/1 [==============================] - 0s 128ms/step
12/12 [==============================] - 0s 2ms/step
Dataset: 9D_E4
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 119ms/step
1/1 [==============================] - 0s 72ms/step
1/1 [==============================] - 0s 62ms/step
1/1 [==============================] - 0s 43ms/step
1/1 [==============================] - 0s 132ms/step
12/12 [==============================] - 0s 5ms/step
Dataset: 9D_E5
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 91ms/step
1/1 [==============================] - 0s 101ms/step
1/1 [==============================] - 0s 60ms/step
1/1 [==============================] - 0s 48ms/step
1/1 [==============================] - 0s 129ms/step
12/12 [==============================] - 0s 6ms/step
Dataset: 9D_E6
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
1/1 [==============================] - 0s 65ms/step
1/1 [==============================] - 0s 58ms/step
1/1 [==============================] - 0s 56ms/step
1/1 [==============================] - 0s 69ms/step
1/1 [==============================] - 0s 143ms/step
12/12 [==============================] - 0s 1ms/step
Dataset: 10D_E1
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 25ms/step
2/2 [==============================] - 0s 10ms/step
2/2 [==============================] - 0s 12ms/step
2/2 [==============================] - 0s 17ms/step
2/2 [==============================] - 0s 7ms/step
23/23 [==============================] - 0s 4ms/step
Dataset: 10D_E2
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 23ms/step
2/2 [==============================] - 0s 12ms/step
23/23 [==============================] - 0s 3ms/step
Dataset: 10D_E3
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 18ms/step
2/2 [==============================] - 0s 13ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 9ms/step
2/2 [==============================] - 0s 9ms/step
23/23 [==============================] - 0s 2ms/step
Dataset: 10D_E4
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 12ms/step
2/2 [==============================] - 0s 13ms/step
2/2 [==============================] - 0s 12ms/step
2/2 [==============================] - 0s 13ms/step
2/2 [==============================] - 0s 12ms/step
23/23 [==============================] - 0s 4ms/step
Dataset: 10D_E5
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 12ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 12ms/step
23/23 [==============================] - 0s 3ms/step
Dataset: 10D_E6
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
2/2 [==============================] - 0s 9ms/step
2/2 [==============================] - 0s 13ms/step
2/2 [==============================] - 0s 11ms/step
2/2 [==============================] - 0s 9ms/step
2/2 [==============================] - 0s 10ms/step
23/23 [==============================] - 0s 4ms/step
Dataset: 11D_E1
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 12ms/step
4/4 [==============================] - 0s 5ms/step
4/4 [==============================] - 0s 7ms/step
4/4 [==============================] - 0s 9ms/step
4/4 [==============================] - 0s 9ms/step
45/45 [==============================] - 0s 3ms/step
Dataset: 11D_E2
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 8ms/step
4/4 [==============================] - 0s 7ms/step
4/4 [==============================] - 0s 5ms/step
4/4 [==============================] - 0s 7ms/step
4/4 [==============================] - 0s 10ms/step
45/45 [==============================] - 0s 2ms/step
Dataset: 11D_E3
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 13ms/step
4/4 [==============================] - 0s 8ms/step
4/4 [==============================] - 0s 4ms/step
4/4 [==============================] - 0s 12ms/step
4/4 [==============================] - 0s 16ms/step
45/45 [==============================] - 0s 6ms/step
Dataset: 11D_E4
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 10ms/step
4/4 [==============================] - 0s 5ms/step
4/4 [==============================] - 0s 3ms/step
4/4 [==============================] - 0s 2ms/step
4/4 [==============================] - 0s 3ms/step
45/45 [==============================] - 0s 3ms/step
Dataset: 11D_E5
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 4ms/step
4/4 [==============================] - 0s 11ms/step
4/4 [==============================] - 0s 7ms/step
4/4 [==============================] - 0s 2ms/step
4/4 [==============================] - 0s 4ms/step
45/45 [==============================] - 0s 4ms/step
Dataset: 11D_E6
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
4/4 [==============================] - 0s 5ms/step
4/4 [==============================] - 0s 4ms/step
4/4 [==============================] - 0s 9ms/step
4/4 [==============================] - 0s 10ms/step
4/4 [==============================] - 0s 13ms/step
45/45 [==============================] - 0s 2ms/step
Dataset: 12D_E1
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 7ms/step
8/8 [==============================] - 0s 7ms/step
8/8 [==============================] - 0s 4ms/step
90/90 [==============================] - 0s 2ms/step
Dataset: 12D_E2
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 7ms/step
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 6ms/step
8/8 [==============================] - 0s 2ms/step
8/8 [==============================] - 0s 3ms/step
90/90 [==============================] - 0s 2ms/step
Dataset: 12D_E3
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 2ms/step
8/8 [==============================] - 0s 11ms/step
8/8 [==============================] - 0s 2ms/step
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 5ms/step
90/90 [==============================] - 0s 2ms/step
Dataset: 12D_E4
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 6ms/step
8/8 [==============================] - 0s 6ms/step
8/8 [==============================] - 0s 4ms/step
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 5ms/step
90/90 [==============================] - 0s 2ms/step
Dataset: 12D_E5
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 4ms/step
8/8 [==============================] - 0s 2ms/step
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 6ms/step
90/90 [==============================] - 0s 1ms/step
Dataset: 12D_E6
/tmp/ipykernel_9307/2486336337.py:81: DeprecationWarning: KerasClassifier is deprecated, use Sci-Keras (https://github.com/adriangb/scikeras) instead. See https://www.adriangb.com/scikeras/stable/migration.html for help migrating.
  model = KerasClassifier(build_fn=create_neural_network, verbose=0)
8/8 [==============================] - 0s 5ms/step
8/8 [==============================] - 0s 10ms/step
8/8 [==============================] - 0s 6ms/step
8/8 [==============================] - 0s 6ms/step
8/8 [==============================] - 0s 3ms/step
90/90 [==============================] - 0s 2ms/step
Runtime(seconds)= 1180.890480041504
Overall AUC-ROC (Random Forest)= 0.7892759939962393

Plots

[11]:
# Set up the figure
fig, ax = plt.subplots(figsize=(6, 6))


ax.title.set_fontsize(16)
ax.xaxis.label.set_fontsize(14)
ax.yaxis.label.set_fontsize(14)


# Plot ROC curves
ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')

############
# NoiseCut #
############
ax.plot(
    avg_fpr_NC,
    avg_tpr_NC,
    "-",
    #color="royalblue",
    label="NoiseCut (AUC = {:.2f})".format(
        metrics.auc(avg_fpr_NC_sorted, avg_tpr_NC_sorted)
    ),
    linewidth=2.5,
)

###########
# DNN #
###########
ax.plot(
    avg_fpr_nn,
    avg_tpr_nn,
    "-",
    #color="peru",
    label="DNN (AUC = {:.2f})".format(
        metrics.auc(avg_fpr_nn_sorted, avg_tpr_nn_sorted)
    ),
    linewidth=2.5,
)

###########
# XGBoost #
###########
ax.plot(
    avg_fpr_xgb,
    avg_tpr_xgb,
    "-",
    #color="peru",
    label="XGBoost (AUC = {:.2f})".format(
        metrics.auc(avg_fpr_xgb_sorted, avg_tpr_xgb_sorted)
    ),
    linewidth=2.5,
)

#######
# SVM #
#######
ax.plot(
    avg_fpr_svm,
    avg_tpr_svm,
    "-",
    #color="peru",
    label="SVM (AUC = {:.2f})".format(
        metrics.auc(avg_fpr_svm_sorted, avg_tpr_svm_sorted)
    ),
    linewidth=2.5,
)

#################
# Random Forest #
#################
ax.plot(
    avg_fpr_rf,
    avg_tpr_rf,
    "-",
    #color="peru",
    label="Random Forest (AUC = {:.2f})".format(
        metrics.auc(avg_fpr_rf_sorted, avg_tpr_rf_sorted)
    ),
    linewidth=2.5,
)


# Set title, labels, and legend
ax.set_title("Comparing ROC curves")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.legend(loc="lower right")
#ax.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

# Save the figure
fig.savefig("./multiple_roc_curve_plain.png", dpi=300)
../_images/notebooks_Classification_with_reduced_training_data_13_0.png