From 5bd8e4171191700e44424bcc4f84e5fc6b9988d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Thu, 7 Sep 2017 14:24:55 +0200 Subject: [PATCH] add staggered model training for intermediate sever prediction; refactor model return values --- Makefile | 11 ++++--- main.py | 65 +++++++++++++++++++++++++++++----------- models/__init__.py | 6 ++-- models/pauls_networks.py | 43 ++++++++++++-------------- models/renes_networks.py | 31 ++++++++----------- run.sh | 6 ++-- 6 files changed, 92 insertions(+), 70 deletions(-) diff --git a/Makefile b/Makefile index d342f81..4e9e6c9 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,19 @@ run: - python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 10 --depth small \ + python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 2 --depth small \ --hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final - python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 10 --depth small \ + python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 2 --depth small \ --hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter - python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 10 --depth medium \ + python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 2 --depth medium \ --hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final - python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 10 --depth medium \ + python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 2 --depth medium \ --hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter + python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test5 --epochs 2 --depth small \ + --hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type staggered + test: python3 main.py --mode test --batch 128 --models results/test* --test data/rk_mini.csv.gz diff --git a/main.py b/main.py index 42cbe5c..43247c5 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import tensorflow as tf from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping -from keras.models import load_model +from keras.models import load_model, Model import arguments import dataset @@ -123,11 +123,6 @@ def main_train(param=None): args.train_data, args.domain_length, args.window) - - if not param: - param = PARAMS - logger.info(f"Generator model with params: {param}") - embedding, model, new_model = models.get_models_by_params(param) logger.info("define callbacks") callbacks = [] callbacks.append(ModelCheckpoint(filepath=args.clf_model, @@ -154,26 +149,45 @@ def main_train(param=None): logger.info(f"select model: {args.model_type}") if args.model_type == "staggered": + if not param: + param = PARAMS + logger.info(f"Generator model with params: {param}") + embedding, model, new_model = models.get_models_by_params(param) + + if args.model_output == "both": + model = Model(inputs=[new_model.in_domains, new_model.in_flows], + outputs=(new_model.out_server, new_model.out_client)) + else: + raise Exception("unknown model output") + server_tr = np.expand_dims(server_windows_tr, 2) - model = new_model logger.info("compile and train model") embedding.summary() model.summary() logger.info(model.get_config()) - model.outputs - model.compile(optimizer='adam', loss='binary_crossentropy', + loss_weights={"client": 0.0, "server": 1.0}, metrics=['accuracy'] + custom_metrics) - if args.model_output == "both": - labels = [client_tr, server_tr] - else: - raise ValueError("unknown model output") + model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr}, + {"client": client_tr, "server": server_tr}, + batch_size=args.batch_size, + epochs=args.epochs, + shuffle=True, + validation_split=0.2, + class_weight=custom_class_weights) - model.fit([domain_tr, flow_tr], - labels, + model.get_layer("dense_server").trainable = False + model.compile(optimizer='adam', + loss='binary_crossentropy', + loss_weights={"client": 1.0, "server": 0.0}, + metrics=['accuracy'] + custom_metrics) + + model.summary() + model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr}, + {"client": client_tr, "server": server_tr}, batch_size=args.batch_size, epochs=args.epochs, callbacks=callbacks, @@ -182,6 +196,21 @@ def main_train(param=None): class_weight=custom_class_weights) else: + if not param: + param = PARAMS + logger.info(f"Generator model with params: {param}") + embedding, model, new_model = models.get_models_by_params(param) + + if args.model_output == "both": + model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client, model.out_server)) + new_model = Model(inputs=[new_model.in_domains, new_model.in_flows], + outputs=(new_model.out_client, new_model.out_server)) + elif args.model_output == "client": + model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client,)) + new_model = Model(inputs=[new_model.in_domains, new_model.in_flows], outputs=(new_model.out_client,)) + else: + raise Exception("unknown model output") + if args.model_type == "inter": server_tr = np.expand_dims(server_windows_tr, 2) model = new_model @@ -301,9 +330,9 @@ def main_visualization(): visualize.plot_confusion_matrix(client_val, client_pred.flatten().round(), "{}/client_cov.png".format(args.model_path), normalize=False, title="Client Confusion Matrix") - # visualize.plot_confusion_matrix(server_val.argmax(1), server_pred.argmax(1), - # "{}/server_cov.png".format(args.model_path), - # normalize=False, title="Server Confusion Matrix") + visualize.plot_confusion_matrix(user_vals, user_preds.flatten().round(), + "{}/user_cov.png".format(args.model_path), + normalize=False, title="User Confusion Matrix") logger.info("visualize embedding") domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length) domain_embedding = np.load(args.model_path + "/domain_embds.npy") diff --git a/models/__init__.py b/models/__init__.py index d55fd18..446647e 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -34,13 +34,13 @@ def get_models_by_params(params: dict): embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding, hidden_embedding, dropout) - predict_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length, - filter_main, kernel_main, dense_dim, embedding_model, model_output) + old_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length, + filter_main, kernel_main, dense_dim, embedding_model, model_output) new_model = networks.get_new_model(dropout, flow_features, domain_features, window_size, domain_length, filter_main, kernel_main, dense_dim, embedding_model, model_output) - return embedding_model, predict_model, new_model + return embedding_model, old_model, new_model def get_metrics(): diff --git a/models/pauls_networks.py b/models/pauls_networks.py index 779354a..fefc040 100644 --- a/models/pauls_networks.py +++ b/models/pauls_networks.py @@ -1,9 +1,13 @@ import keras -from keras.engine import Input, Model +from keras.engine import Input, Model as KerasModel from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed import dataset +from collections import namedtuple + +Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"]) + best_config = { "type": "paul", "batch_size": 64, @@ -26,7 +30,7 @@ best_config = { } -def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5): +def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5) -> KerasModel: x = y = Input(shape=(input_length,)) y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y) y = Conv1D(filter_size, kernel_size, activation='relu')(y) @@ -34,11 +38,11 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden y = Dropout(drop_out)(y) y = Dense(hidden_dims)(y) y = Activation('relu')(y) - return Model(x, y) + return KerasModel(x, y) def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, - dense_dim, cnn, model_output="both"): + dense_dim, cnn, model_output="both") -> Model: ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") encoded = TimeDistributed(cnn)(ipt_domains) ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") @@ -52,40 +56,31 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le y = GlobalMaxPooling1D()(y) y = Dropout(cnnDropout)(y) y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(1, activation='sigmoid', name="client")(y) - y2 = Dense(1, activation='sigmoid', name="server")(y) + out_client = Dense(1, activation='sigmoid', name="client")(y) + out_server = Dense(1, activation='sigmoid', name="server")(y) - if model_output == "both": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) - elif model_output == "client": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,)) - elif model_output == "server": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,)) + return Model(ipt_domains, ipt_flows, out_client, out_server) def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, - dense_dim, cnn, model_output="both"): + dense_dim, cnn, model_output="both") -> Model: ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") encoded = TimeDistributed(cnn)(ipt_domains) merged = keras.layers.concatenate([encoded, ipt_flows], -1) - y = Dense(dense_dim, activation="relu")(merged) - y2 = Dense(1, activation="sigmoid", name="server")(y) + y = Dense(dense_dim, activation="relu", name="dense_server")(merged) + out_server = Dense(1, activation="sigmoid", name="server")(y) + merged = keras.layers.concatenate([merged, y], -1) # CNN processing a small slides of flow windows y = Conv1D(cnn_dims, kernel_size, activation='relu', - input_shape=(window_size, domain_features + flow_features))(y) + input_shape=(window_size, domain_features + flow_features))(merged) # remove temporal dimension by global max pooling y = GlobalMaxPooling1D()(y) y = Dropout(dropout)(y) - y = Dense(dense_dim, activation='relu')(y) + y = Dense(dense_dim, activation='relu', name="dense_client")(y) - y1 = Dense(1, activation='sigmoid', name="client")(y) + out_client = Dense(1, activation='sigmoid', name="client")(y) - if model_output == "both": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) - elif model_output == "client": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,)) - elif model_output == "server": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,)) + return Model(ipt_domains, ipt_flows, out_client, out_server) diff --git a/models/renes_networks.py b/models/renes_networks.py index 96b684d..a3e29f6 100644 --- a/models/renes_networks.py +++ b/models/renes_networks.py @@ -1,10 +1,14 @@ import keras -from keras.engine import Input, Model +from keras.engine import Input, Model as KerasModel from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, TimeDistributed, MaxPool1D, \ GlobalAveragePooling1D import dataset +from collections import namedtuple + +Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"]) + def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5): x = y = Input(shape=(input_length,)) @@ -14,7 +18,7 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden y = Conv1D(filter_size, kernel_size=3, activation='relu')(y) y = GlobalAveragePooling1D()(y) y = Dense(hidden_dims, activation="relu")(y) - return Model(x, y) + return KerasModel(x, y) def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, @@ -35,15 +39,10 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le y = Dropout(cnnDropout)(y) y = Dense(dense_dim, activation='relu')(y) y = Dense(dense_dim // 2, activation='relu')(y) - y1 = Dense(1, activation='sigmoid', name="client")(y) - y2 = Dense(1, activation='sigmoid', name="server")(y) + out_client = Dense(1, activation='sigmoid', name="client")(y) + out_server = Dense(1, activation='sigmoid', name="server")(y) - if model_output == "both": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) - elif model_output == "client": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,)) - elif model_output == "server": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,)) + return Model(ipt_domains, ipt_flows, out_client, out_server) def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, @@ -53,7 +52,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l encoded = TimeDistributed(cnn)(ipt_domains) merged = keras.layers.concatenate([encoded, ipt_flows], -1) y = Dense(dense_dim, activation="relu")(merged) - y2 = Dense(1, activation="sigmoid", name="server")(y) + out_server = Dense(1, activation="sigmoid", name="server")(y) # CNN processing a small slides of flow windows y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same", input_shape=(window_size, domain_features + flow_features))(y) @@ -66,11 +65,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l y = Dropout(dropout)(y) y = Dense(dense_dim, activation='relu')(y) - y1 = Dense(1, activation='sigmoid', name="client")(y) + out_client = Dense(1, activation='sigmoid', name="client")(y) + + return Model(ipt_domains, ipt_flows, out_client, out_server) - if model_output == "both": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2)) - elif model_output == "client": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,)) - elif model_output == "server": - return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,)) diff --git a/run.sh b/run.sh index 94cdf88..77d3346 100644 --- a/run.sh +++ b/run.sh @@ -7,7 +7,7 @@ DATADIR=$2 for output in client both do - for depth in small medium + for depth in small do for mtype in inter final do @@ -29,7 +29,7 @@ do done done -for depth in small medium +for depth in small do python main.py --mode train \ --train ${DATADIR}/currentData.csv \ @@ -41,6 +41,6 @@ do --batch 256 \ --balanced_weights \ --model_output both \ - --type inter \ + --type staggered \ --depth ${depth} done \ No newline at end of file