From 7c05ef6a1267cce57dc7d0bb48e7b8e3992e60c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Wed, 5 Jul 2017 18:10:22 +0200 Subject: [PATCH] refactor models package: create separate modules for pauls and renes networks --- main.py | 14 +++---- models.py | 81 ---------------------------------------- models/__init__.py | 2 + models/pauls_networks.py | 37 ++++++++++++++++++ models/renes_networks.py | 40 ++++++++++++++++++++ 5 files changed, 84 insertions(+), 90 deletions(-) delete mode 100644 models.py create mode 100644 models/__init__.py create mode 100644 models/pauls_networks.py create mode 100644 models/renes_networks.py diff --git a/main.py b/main.py index 7d95719..c5e5862 100644 --- a/main.py +++ b/main.py @@ -87,10 +87,6 @@ def main(): kernel_size = 3 drop_out = 0.5 filters = 128 - hidden_dims = 100 - vocabSize = 40 - threshold = 3 - minFlowsPerUser = 10 char_dict = dataset.get_character_dict() user_flow_df = dataset.get_user_flow_data() @@ -110,13 +106,13 @@ def main(): client_labels = client_labels[idx] server_labels = server_tr[idx] - shared_cnn = models.get_embedding_network_rene(len(char_dict) + 1, args.embedding, args.domain_length, - args.hidden_char_dims, args.domain_embedding, 0.5) + shared_cnn = models.renes_networks.get_embedding(len(char_dict) + 1, args.embedding, args.domain_length, + args.hidden_char_dims, kernel_size, args.domain_embedding, 0.5) shared_cnn.summary() - model = models.get_top_cnn_rene(cnnDropout, flowFeatures, args.domain_embedding, - args.window, args.domain_length, filters, kernel_size, - cnnHiddenDims, shared_cnn) + model = models.renes_networks.get_model(cnnDropout, flowFeatures, args.domain_embedding, + args.window, args.domain_length, filters, kernel_size, + cnnHiddenDims, shared_cnn) model.summary() model.compile(optimizer='adam', diff --git a/models.py b/models.py deleted file mode 100644 index f381310..0000000 --- a/models.py +++ /dev/null @@ -1,81 +0,0 @@ -import keras -from keras.engine import Input, Model -from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed, MaxPool1D - - -# designed by paul -def get_embedding_network_paul(vocab_size, embedding_size, input_length, filters, kernel_size, - hidden_dims, drop_out=0.5): - x = y = Input(shape=(input_length,)) - y = Embedding(input_dim=vocab_size, output_dim=embedding_size)(y) - y = Conv1D(filters, kernel_size, activation='relu')(y) - y = GlobalMaxPooling1D()(y) - y = Dense(hidden_dims)(y) - y = Dropout(drop_out)(y) - y = Activation('relu')(y) - return Model(x, y) - - -def get_embedding_network_rene(vocab_size, embedding_size, input_length, - hidden_char_dims, hidden_dims, drop_out=0.5): - x = y = Input(shape=(input_length,)) - y = Embedding(input_dim=vocab_size, output_dim=embedding_size, mask_zero=True)(y) - y = Conv1D(hidden_char_dims, kernel_size=5, activation='relu')(y) - y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) - y = MaxPool1D(pool_size=3, strides=1)(y) - y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) - y = GlobalMaxPooling1D()(y) - y = Dense(hidden_dims)(y) - y = Dropout(drop_out)(y) - y = Activation('relu')(y) - return Model(x, y) - - -def get_full_model(vocabSize, embeddingSize, maxLen, domainFeatures, flowFeatures, - filters, h1, h2, dropout, dense): - pass - - -# designed by paul -def get_top_cnn(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, - dense_dim, - cnn): - ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") - encoded = TimeDistributed(cnn)(ipt_domains) - ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") - merged = keras.layers.concatenate([encoded, ipt_flows], -1) - # CNN processing a small slides of flow windows - # TODO: add more layers? - y = Conv1D(cnn_dims, - kernel_size, - activation='relu', - input_shape=(window_size, domain_features + flow_features))(merged) - # remove temporal dimension by global max pooling - y = GlobalMaxPooling1D()(y) - y = Dropout(cnnDropout)(y) - y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) - y2 = Dense(2, activation='softmax', name="server")(y) - - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) - - -def get_top_cnn_rene(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, - dense_dim, cnn): - ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") - encoded = TimeDistributed(cnn)(ipt_domains) - ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") - merged = keras.layers.concatenate([encoded, ipt_flows], -1) - # CNN processing a small slides of flow windows - # TODO: add more layers? - y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', - input_shape=(window_size, domain_features + flow_features))(merged) - # remove temporal dimension by global max pooling - y = GlobalMaxPooling1D()(y) - y = Dropout(cnnDropout)(y) - y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(2, activation='softmax', name="client")(y) - y2 = Dense(2, activation='softmax', name="server")(y) - - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..1bd832e --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,2 @@ +from . import pauls_networks +from . import renes_networks diff --git a/models/pauls_networks.py b/models/pauls_networks.py new file mode 100644 index 0000000..837fc02 --- /dev/null +++ b/models/pauls_networks.py @@ -0,0 +1,37 @@ +import keras +from keras.engine import Input, Model +from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed + + +def get_embedding(vocab_size, embedding_size, input_length, + filters, kernel_size, hidden_dims, drop_out=0.5): + x = y = Input(shape=(input_length,)) + y = Embedding(input_dim=vocab_size, output_dim=embedding_size)(y) + y = Conv1D(filters, kernel_size, activation='relu')(y) + y = GlobalMaxPooling1D()(y) + y = Dense(hidden_dims)(y) + y = Dropout(drop_out)(y) + y = Activation('relu')(y) + return Model(x, y) + + +def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, + dense_dim, cnn): + ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") + encoded = TimeDistributed(cnn)(ipt_domains) + ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") + merged = keras.layers.concatenate([encoded, ipt_flows], -1) + # CNN processing a small slides of flow windows + # TODO: add more layers? + y = Conv1D(cnn_dims, + kernel_size, + activation='relu', + input_shape=(window_size, domain_features + flow_features))(merged) + # remove temporal dimension by global max pooling + y = GlobalMaxPooling1D()(y) + y = Dropout(cnnDropout)(y) + y = Dense(dense_dim, activation='relu')(y) + y1 = Dense(2, activation='softmax', name="client")(y) + y2 = Dense(2, activation='softmax', name="server")(y) + + return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) diff --git a/models/renes_networks.py b/models/renes_networks.py new file mode 100644 index 0000000..aaaefa6 --- /dev/null +++ b/models/renes_networks.py @@ -0,0 +1,40 @@ +import keras +from keras.engine import Input, Model +from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, TimeDistributed, MaxPool1D + + +def get_embedding(vocab_size, embedding_size, input_length, + hidden_char_dims, kernel_size, hidden_dims, drop_out=0.5): + x = y = Input(shape=(input_length,)) + y = Embedding(input_dim=vocab_size, output_dim=embedding_size)(y) + y = Conv1D(hidden_char_dims, kernel_size=5, activation='relu')(y) + y = MaxPool1D(pool_size=3, strides=1)(y) + y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) + y = MaxPool1D(pool_size=3, strides=1)(y) + y = Conv1D(hidden_char_dims, kernel_size=3, activation='relu')(y) + y = GlobalMaxPooling1D()(y) + y = Dropout(drop_out)(y) + y = Dense(hidden_dims, activation="relu")(y) + return Model(x, y) + + +def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, + dense_dim, cnn): + ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") + encoded = TimeDistributed(cnn)(ipt_domains) + ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") + merged = keras.layers.concatenate([encoded, ipt_flows], -1) + # CNN processing a small slides of flow windows + # TODO: add more layers? + y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', + input_shape=(window_size, domain_features + flow_features))(merged) + y = MaxPool1D(pool_size=3, strides=1)(y) + y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu')(y) + # remove temporal dimension by global max pooling + y = GlobalMaxPooling1D()(y) + y = Dropout(cnnDropout)(y) + y = Dense(dense_dim, activation='relu')(y) + y1 = Dense(2, activation='softmax', name="client")(y) + y2 = Dense(2, activation='softmax', name="server")(y) + + return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))