add staggered model training for intermediate sever prediction; refactor model return values
This commit is contained in:
parent
2080444fb7
commit
5bd8e41711
11
Makefile
11
Makefile
@ -1,16 +1,19 @@
|
||||
run:
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 10 --depth small \
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test1 --epochs 2 --depth small \
|
||||
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final
|
||||
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 10 --depth small \
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test2 --epochs 2 --depth small \
|
||||
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter
|
||||
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 10 --depth medium \
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test3 --epochs 2 --depth medium \
|
||||
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type final
|
||||
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 10 --depth medium \
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test4 --epochs 2 --depth medium \
|
||||
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type inter
|
||||
|
||||
python3 main.py --mode train --train data/rk_mini.csv.gz --model results/test5 --epochs 2 --depth small \
|
||||
--hidden_char_dims 16 --domain_embd 8 --batch 64 --balanced_weights --type staggered
|
||||
|
||||
test:
|
||||
python3 main.py --mode test --batch 128 --models results/test* --test data/rk_mini.csv.gz
|
||||
|
||||
|
65
main.py
65
main.py
@ -6,7 +6,7 @@ import numpy as np
|
||||
import pandas as pd
|
||||
import tensorflow as tf
|
||||
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
|
||||
from keras.models import load_model
|
||||
from keras.models import load_model, Model
|
||||
|
||||
import arguments
|
||||
import dataset
|
||||
@ -123,11 +123,6 @@ def main_train(param=None):
|
||||
args.train_data,
|
||||
args.domain_length,
|
||||
args.window)
|
||||
|
||||
if not param:
|
||||
param = PARAMS
|
||||
logger.info(f"Generator model with params: {param}")
|
||||
embedding, model, new_model = models.get_models_by_params(param)
|
||||
logger.info("define callbacks")
|
||||
callbacks = []
|
||||
callbacks.append(ModelCheckpoint(filepath=args.clf_model,
|
||||
@ -154,26 +149,45 @@ def main_train(param=None):
|
||||
|
||||
logger.info(f"select model: {args.model_type}")
|
||||
if args.model_type == "staggered":
|
||||
if not param:
|
||||
param = PARAMS
|
||||
logger.info(f"Generator model with params: {param}")
|
||||
embedding, model, new_model = models.get_models_by_params(param)
|
||||
|
||||
if args.model_output == "both":
|
||||
model = Model(inputs=[new_model.in_domains, new_model.in_flows],
|
||||
outputs=(new_model.out_server, new_model.out_client))
|
||||
else:
|
||||
raise Exception("unknown model output")
|
||||
|
||||
server_tr = np.expand_dims(server_windows_tr, 2)
|
||||
model = new_model
|
||||
logger.info("compile and train model")
|
||||
embedding.summary()
|
||||
model.summary()
|
||||
logger.info(model.get_config())
|
||||
|
||||
model.outputs
|
||||
|
||||
model.compile(optimizer='adam',
|
||||
loss='binary_crossentropy',
|
||||
loss_weights={"client": 0.0, "server": 1.0},
|
||||
metrics=['accuracy'] + custom_metrics)
|
||||
|
||||
if args.model_output == "both":
|
||||
labels = [client_tr, server_tr]
|
||||
else:
|
||||
raise ValueError("unknown model output")
|
||||
model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr},
|
||||
{"client": client_tr, "server": server_tr},
|
||||
batch_size=args.batch_size,
|
||||
epochs=args.epochs,
|
||||
shuffle=True,
|
||||
validation_split=0.2,
|
||||
class_weight=custom_class_weights)
|
||||
|
||||
model.fit([domain_tr, flow_tr],
|
||||
labels,
|
||||
model.get_layer("dense_server").trainable = False
|
||||
model.compile(optimizer='adam',
|
||||
loss='binary_crossentropy',
|
||||
loss_weights={"client": 1.0, "server": 0.0},
|
||||
metrics=['accuracy'] + custom_metrics)
|
||||
|
||||
model.summary()
|
||||
model.fit({"ipt_domains": domain_tr, "ipt_flows": flow_tr},
|
||||
{"client": client_tr, "server": server_tr},
|
||||
batch_size=args.batch_size,
|
||||
epochs=args.epochs,
|
||||
callbacks=callbacks,
|
||||
@ -182,6 +196,21 @@ def main_train(param=None):
|
||||
class_weight=custom_class_weights)
|
||||
|
||||
else:
|
||||
if not param:
|
||||
param = PARAMS
|
||||
logger.info(f"Generator model with params: {param}")
|
||||
embedding, model, new_model = models.get_models_by_params(param)
|
||||
|
||||
if args.model_output == "both":
|
||||
model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client, model.out_server))
|
||||
new_model = Model(inputs=[new_model.in_domains, new_model.in_flows],
|
||||
outputs=(new_model.out_client, new_model.out_server))
|
||||
elif args.model_output == "client":
|
||||
model = Model(inputs=[model.in_domains, model.in_flows], outputs=(model.out_client,))
|
||||
new_model = Model(inputs=[new_model.in_domains, new_model.in_flows], outputs=(new_model.out_client,))
|
||||
else:
|
||||
raise Exception("unknown model output")
|
||||
|
||||
if args.model_type == "inter":
|
||||
server_tr = np.expand_dims(server_windows_tr, 2)
|
||||
model = new_model
|
||||
@ -301,9 +330,9 @@ def main_visualization():
|
||||
visualize.plot_confusion_matrix(client_val, client_pred.flatten().round(),
|
||||
"{}/client_cov.png".format(args.model_path),
|
||||
normalize=False, title="Client Confusion Matrix")
|
||||
# visualize.plot_confusion_matrix(server_val.argmax(1), server_pred.argmax(1),
|
||||
# "{}/server_cov.png".format(args.model_path),
|
||||
# normalize=False, title="Server Confusion Matrix")
|
||||
visualize.plot_confusion_matrix(user_vals, user_preds.flatten().round(),
|
||||
"{}/user_cov.png".format(args.model_path),
|
||||
normalize=False, title="User Confusion Matrix")
|
||||
logger.info("visualize embedding")
|
||||
domain_encs, labels = dataset.load_or_generate_domains(args.test_data, args.domain_length)
|
||||
domain_embedding = np.load(args.model_path + "/domain_embds.npy")
|
||||
|
@ -34,13 +34,13 @@ def get_models_by_params(params: dict):
|
||||
embedding_model = networks.get_embedding(embedding_size, input_length, filter_embedding, kernel_embedding,
|
||||
hidden_embedding, dropout)
|
||||
|
||||
predict_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length,
|
||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||
old_model = networks.get_model(dropout, flow_features, domain_features, window_size, domain_length,
|
||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||
|
||||
new_model = networks.get_new_model(dropout, flow_features, domain_features, window_size, domain_length,
|
||||
filter_main, kernel_main, dense_dim, embedding_model, model_output)
|
||||
|
||||
return embedding_model, predict_model, new_model
|
||||
return embedding_model, old_model, new_model
|
||||
|
||||
|
||||
def get_metrics():
|
||||
|
@ -1,9 +1,13 @@
|
||||
import keras
|
||||
from keras.engine import Input, Model
|
||||
from keras.engine import Input, Model as KerasModel
|
||||
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed
|
||||
|
||||
import dataset
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
|
||||
|
||||
best_config = {
|
||||
"type": "paul",
|
||||
"batch_size": 64,
|
||||
@ -26,7 +30,7 @@ best_config = {
|
||||
}
|
||||
|
||||
|
||||
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5):
|
||||
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5) -> KerasModel:
|
||||
x = y = Input(shape=(input_length,))
|
||||
y = Embedding(input_dim=dataset.get_vocab_size(), output_dim=embedding_size)(y)
|
||||
y = Conv1D(filter_size, kernel_size, activation='relu')(y)
|
||||
@ -34,11 +38,11 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
|
||||
y = Dropout(drop_out)(y)
|
||||
y = Dense(hidden_dims)(y)
|
||||
y = Activation('relu')(y)
|
||||
return Model(x, y)
|
||||
return KerasModel(x, y)
|
||||
|
||||
|
||||
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
dense_dim, cnn, model_output="both"):
|
||||
dense_dim, cnn, model_output="both") -> Model:
|
||||
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
||||
encoded = TimeDistributed(cnn)(ipt_domains)
|
||||
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
||||
@ -52,40 +56,31 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
|
||||
y = GlobalMaxPooling1D()(y)
|
||||
y = Dropout(cnnDropout)(y)
|
||||
y = Dense(dense_dim, activation='relu')(y)
|
||||
y1 = Dense(1, activation='sigmoid', name="client")(y)
|
||||
y2 = Dense(1, activation='sigmoid', name="server")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
||||
|
||||
if model_output == "both":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
|
||||
elif model_output == "client":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
|
||||
elif model_output == "server":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||
|
||||
|
||||
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
dense_dim, cnn, model_output="both"):
|
||||
dense_dim, cnn, model_output="both") -> Model:
|
||||
ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains")
|
||||
ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows")
|
||||
encoded = TimeDistributed(cnn)(ipt_domains)
|
||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||
y = Dense(dense_dim, activation="relu")(merged)
|
||||
y2 = Dense(1, activation="sigmoid", name="server")(y)
|
||||
y = Dense(dense_dim, activation="relu", name="dense_server")(merged)
|
||||
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
||||
merged = keras.layers.concatenate([merged, y], -1)
|
||||
# CNN processing a small slides of flow windows
|
||||
y = Conv1D(cnn_dims,
|
||||
kernel_size,
|
||||
activation='relu',
|
||||
input_shape=(window_size, domain_features + flow_features))(y)
|
||||
input_shape=(window_size, domain_features + flow_features))(merged)
|
||||
# remove temporal dimension by global max pooling
|
||||
y = GlobalMaxPooling1D()(y)
|
||||
y = Dropout(dropout)(y)
|
||||
y = Dense(dense_dim, activation='relu')(y)
|
||||
y = Dense(dense_dim, activation='relu', name="dense_client")(y)
|
||||
|
||||
y1 = Dense(1, activation='sigmoid', name="client")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
|
||||
if model_output == "both":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
|
||||
elif model_output == "client":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
|
||||
elif model_output == "server":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||
|
@ -1,10 +1,14 @@
|
||||
import keras
|
||||
from keras.engine import Input, Model
|
||||
from keras.engine import Input, Model as KerasModel
|
||||
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, TimeDistributed, MaxPool1D, \
|
||||
GlobalAveragePooling1D
|
||||
|
||||
import dataset
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
Model = namedtuple("Model", ["in_domains", "in_flows", "out_client", "out_server"])
|
||||
|
||||
|
||||
def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden_dims, drop_out=0.5):
|
||||
x = y = Input(shape=(input_length,))
|
||||
@ -14,7 +18,7 @@ def get_embedding(embedding_size, input_length, filter_size, kernel_size, hidden
|
||||
y = Conv1D(filter_size, kernel_size=3, activation='relu')(y)
|
||||
y = GlobalAveragePooling1D()(y)
|
||||
y = Dense(hidden_dims, activation="relu")(y)
|
||||
return Model(x, y)
|
||||
return KerasModel(x, y)
|
||||
|
||||
|
||||
def get_model(cnnDropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
@ -35,15 +39,10 @@ def get_model(cnnDropout, flow_features, domain_features, window_size, domain_le
|
||||
y = Dropout(cnnDropout)(y)
|
||||
y = Dense(dense_dim, activation='relu')(y)
|
||||
y = Dense(dense_dim // 2, activation='relu')(y)
|
||||
y1 = Dense(1, activation='sigmoid', name="client")(y)
|
||||
y2 = Dense(1, activation='sigmoid', name="server")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
out_server = Dense(1, activation='sigmoid', name="server")(y)
|
||||
|
||||
if model_output == "both":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
|
||||
elif model_output == "client":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
|
||||
elif model_output == "server":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||
|
||||
|
||||
def get_new_model(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size,
|
||||
@ -53,7 +52,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
|
||||
encoded = TimeDistributed(cnn)(ipt_domains)
|
||||
merged = keras.layers.concatenate([encoded, ipt_flows], -1)
|
||||
y = Dense(dense_dim, activation="relu")(merged)
|
||||
y2 = Dense(1, activation="sigmoid", name="server")(y)
|
||||
out_server = Dense(1, activation="sigmoid", name="server")(y)
|
||||
# CNN processing a small slides of flow windows
|
||||
y = Conv1D(filters=cnn_dims, kernel_size=kernel_size, activation='relu', padding="same",
|
||||
input_shape=(window_size, domain_features + flow_features))(y)
|
||||
@ -66,11 +65,7 @@ def get_new_model(dropout, flow_features, domain_features, window_size, domain_l
|
||||
y = Dropout(dropout)(y)
|
||||
y = Dense(dense_dim, activation='relu')(y)
|
||||
|
||||
y1 = Dense(1, activation='sigmoid', name="client")(y)
|
||||
out_client = Dense(1, activation='sigmoid', name="client")(y)
|
||||
|
||||
return Model(ipt_domains, ipt_flows, out_client, out_server)
|
||||
|
||||
if model_output == "both":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1, y2))
|
||||
elif model_output == "client":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y1,))
|
||||
elif model_output == "server":
|
||||
return Model(inputs=[ipt_domains, ipt_flows], outputs=(y2,))
|
||||
|
6
run.sh
6
run.sh
@ -7,7 +7,7 @@ DATADIR=$2
|
||||
|
||||
for output in client both
|
||||
do
|
||||
for depth in small medium
|
||||
for depth in small
|
||||
do
|
||||
for mtype in inter final
|
||||
do
|
||||
@ -29,7 +29,7 @@ do
|
||||
done
|
||||
done
|
||||
|
||||
for depth in small medium
|
||||
for depth in small
|
||||
do
|
||||
python main.py --mode train \
|
||||
--train ${DATADIR}/currentData.csv \
|
||||
@ -41,6 +41,6 @@ do
|
||||
--batch 256 \
|
||||
--balanced_weights \
|
||||
--model_output both \
|
||||
--type inter \
|
||||
--type staggered \
|
||||
--depth ${depth}
|
||||
done
|
Loading…
Reference in New Issue
Block a user