From d97785f646bc7b0e8dc6fa6a1af3496e94a163d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Sun, 30 Jul 2017 12:50:26 +0200 Subject: [PATCH] replace softmax by sigmoid in final layer, also adjust dataset for that --- dataset.py | 3 +-- main.py | 15 +++++++-------- models/pauls_networks.py | 9 ++++----- models/renes_networks.py | 12 ++++-------- 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/dataset.py b/dataset.py index ec6a0d4..4369ac3 100644 --- a/dataset.py +++ b/dataset.py @@ -6,7 +6,6 @@ from multiprocessing import Pool import h5py import numpy as np import pandas as pd -from keras.utils import np_utils from tqdm import tqdm logger = logging.getLogger('logger') @@ -119,7 +118,7 @@ def create_dataset_from_flows(user_flow_df, char_dict, max_len, window_size=10): client_tr[:pos_idx.shape[-1]] = 1.0 server_tr = server_tr[idx] - client_tr = np_utils.to_categorical(client_tr, 2) + # client_tr = np_utils.to_categorical(client_tr, 2) return domain_tr, flow_tr, client_tr, server_tr diff --git a/main.py b/main.py index 278b1ff..ae14564 100644 --- a/main.py +++ b/main.py @@ -7,7 +7,6 @@ import pandas as pd import tensorflow as tf from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping from keras.models import load_model -from keras.utils import np_utils from sklearn.utils import class_weight import arguments @@ -98,8 +97,8 @@ def main_hyperband(): def get_custom_class_weights(client_tr, server_tr): - client = client_tr.value.argmax(1) if type(client_tr) != np.ndarray else client_tr.argmax(1) - server = server_tr.value.argmax(1) if type(server_tr) != np.ndarray else server_tr.argmax(1) + client = client_tr.value if type(client_tr) != np.ndarray else client_tr + server = server_tr.value if type(server_tr) != np.ndarray else server_tr client_class_weight = class_weight.compute_class_weight('balanced', np.unique(client), client) server_class_weight = class_weight.compute_class_weight('balanced', np.unique(server), server) return { @@ -157,10 +156,10 @@ def main_train(param=None): logger.info("compile model") custom_metrics = models.get_metric_functions() model.compile(optimizer='adam', - loss='categorical_crossentropy', + loss='binary_crossentropy', metrics=['accuracy'] + custom_metrics) - server_tr = np_utils.to_categorical(np.max(server_windows_tr, axis=1), 2) + server_tr = np.max(server_windows_tr, axis=1) if args.class_weights: logger.info("class weights: compute custom weights") @@ -261,10 +260,10 @@ def main_new_model(): logger.info("compile model") custom_metrics = models.get_metric_functions() model.compile(optimizer='adam', - loss='categorical_crossentropy', + loss='binary_crossentropy', metrics=['accuracy'] + custom_metrics) - server_tr = np_utils.to_categorical(np.max(server_windows_tr, axis=1), 2) + server_tr = np.max(server_windows_tr, axis=1) if args.class_weights: logger.info("class weights: compute custom weights") @@ -274,7 +273,7 @@ def main_new_model(): logger.info("class weights: set default") custom_class_weights = None logger.info("start training") - server_tr = np.stack(np_utils.to_categorical(s, 2) for s in server_windows_tr) + server_tr = np.expand_dims(server_windows_tr, 2) model.fit([domain_tr, flow_tr], [client_tr, server_tr], batch_size=args.batch_size, diff --git a/models/pauls_networks.py b/models/pauls_networks.py index 4919cf6..fa234d1 100644 --- a/models/pauls_networks.py +++ b/models/pauls_networks.py @@ -43,7 +43,6 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") merged = keras.layers.concatenate([encoded, ipt_flows], -1) # CNN processing a small slides of flow windows - # TODO: add more layers? y = Conv1D(cnn_dims, kernel_size, activation='relu', @@ -52,8 +51,8 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le y = GlobalMaxPooling1D()(y) y = Dropout(cnnDropout)(y) y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) - y2 = Dense(2, activation='softmax', name="server")(y) + y1 = Dense(1, activation='sigmoid', name="client")(y) + y2 = Dense(1, activation='sigmoid', name="server")(y) return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) @@ -64,7 +63,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") encoded = TimeDistributed(cnn)(ipt_domains) - y2 = Dense(2, activation="softmax", name="server")(encoded) + y2 = Dense(1, activation="sigmoid", name="server")(encoded) merged = keras.layers.concatenate([encoded, ipt_flows, y2], -1) y = Conv1D(cnn_dims, @@ -76,7 +75,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l y = Dropout(dropout)(y) y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) + y1 = Dense(1, activation='sigmoid', name="client")(y) model = Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) return model diff --git a/models/renes_networks.py b/models/renes_networks.py index 08ae13b..44f28bc 100644 --- a/models/renes_networks.py +++ b/models/renes_networks.py @@ -9,13 +9,9 @@ def get_embedding(vocab_size, embedding_size, input_length, x = y = Input(shape=(input_length,)) y = Embedding(input_dim=vocab_size, output_dim=embedding_size)(y) y = Conv1D(filter_size, kernel_size=5, activation='relu')(y) - # NOTE: max pooling destroys information flow for embedding - # y = MaxPool1D(pool_size=3, strides=1)(y) y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) - # y = MaxPool1D(pool_size=3, strides=1)(y) y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) y = GlobalAveragePooling1D()(y) - # y = Dropout(drop_out)(y) y = Dense(hidden_dims, activation="relu")(y) return Model(x, y) @@ -38,8 +34,8 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le y = Dropout(cnnDropout)(y) y = Dense(dense_dim, activation='relu')(y) y = Dense(dense_dim // 2, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) - y2 = Dense(2, activation='softmax', name="server")(y) + y1 = Dense(1, activation='sigmoid', name="client")(y) + y2 = Dense(1, activation='sigmoid', name="server")(y) return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) @@ -50,7 +46,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") encoded = TimeDistributed(cnn)(ipt_domains) - y2 = Dense(2, activation="softmax", name="server")(encoded) + y2 = Dense(1, activation="sigmoid", name="server")(encoded) merged = keras.layers.concatenate([encoded, ipt_flows, y2], -1) y = Conv1D(cnn_dims, @@ -62,7 +58,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l y = Dropout(dropout)(y) y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) + y1 = Dense(1, activation='sigmoid', name="client")(y) model = Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) return model