train multiple models at once

This commit is contained in:
René Knaebel 2017-11-04 17:58:21 +01:00
parent 88e3eda595
commit 14fef66a55

45
main.py
View File

@ -80,8 +80,8 @@ PARAMS = {
# TODO: remove inner global params # TODO: remove inner global params
def get_param_dist(size="small"): def get_param_dist(dist_size="small"):
if dist_type == "small": if dist_size == "small":
return { return {
# static params # static params
"type": [args.model_type], "type": [args.model_type],
@ -180,11 +180,7 @@ def train(parameters, features, labels):
pass pass
def main_train(param=None): def load_data(data, domain_length, window_size, model_type):
logger.info(f"Create model path {args.model_path}")
exists_or_make_path(args.model_path)
logger.info(f"Use command line arguments: {args}")
# data preparation # data preparation
domain_tr, flow_tr, name_tr, client_tr, server_windows_tr = dataset.load_or_generate_h5data(args.data, domain_tr, flow_tr, name_tr, client_tr, server_windows_tr = dataset.load_or_generate_h5data(args.data,
args.data, args.data,
@ -193,22 +189,38 @@ def main_train(param=None):
server_tr = np.max(server_windows_tr, axis=1) server_tr = np.max(server_windows_tr, axis=1)
if args.model_type in ("inter", "staggered"): if args.model_type in ("inter", "staggered"):
server_tr = np.expand_dims(server_windows_tr, 2) server_tr = np.expand_dims(server_windows_tr, 2)
return domain_tr, flow_tr, client_tr, server_tr
def main_train(param=None):
logger.info(f"Create model path {args.model_path}")
exists_or_make_path(args.model_path)
logger.info(f"Use command line arguments: {args}")
# data preparation
domain_tr, flow_tr, client_tr, server_tr = load_data(args.data, args.domain_length,
args.window, args.model_type)
# call hyperband if used # call hyperband if used
if args.hyperband_results: if args.hyperband_results:
logger.info("start hyperband parameter search") logger.info("start hyperband parameter search")
hyper_results = run_hyperband("small", domain_tr, flow_tr, client_tr, server_tr, 81, args.hyperband_results) hyper_results = run_hyperband("small", domain_tr, flow_tr, client_tr, server_tr, 81, args.hyperband_results)
param = sorted(hyper_results, key=operator.itemgetter("loss"))[0] param = sorted(hyper_results, key=operator.itemgetter("loss"))[0]["params"]
logger.info(f"select params from result: {param}") logger.info(f"select params from result: {param}")
if not param:
param = PARAMS
for i in range(20):
model_path = os.path.join(args.model_path, f"clf_{i}.h5")
train_log_path = os.path.join(args.model_path, "train_{i}.log.csv")
# define training call backs # define training call backs
logger.info("define callbacks") logger.info("define callbacks")
callbacks = [] callbacks = []
callbacks.append(ModelCheckpoint(filepath=args.clf_model, callbacks.append(ModelCheckpoint(filepath=model_path,
monitor='loss', monitor='loss',
verbose=False, verbose=False,
save_best_only=True)) save_best_only=True))
callbacks.append(CSVLogger(args.train_log)) callbacks.append(CSVLogger(train_log_path))
logger.info(f"Use early stopping: {args.stop_early}") logger.info(f"Use early stopping: {args.stop_early}")
if args.stop_early: if args.stop_early:
callbacks.append(EarlyStopping(monitor='val_loss', callbacks.append(EarlyStopping(monitor='val_loss',
@ -233,8 +245,6 @@ def main_train(param=None):
logger.info("class weights: set default") logger.info("class weights: set default")
custom_sample_weights = None custom_sample_weights = None
if not param:
param = PARAMS
logger.info(f"Generator model with params: {param}") logger.info(f"Generator model with params: {param}")
embedding, model, new_model = models.get_models_by_params(param) embedding, model, new_model = models.get_models_by_params(param)
@ -470,15 +480,6 @@ def main_visualization():
normalize=True, title="User Confusion Matrix") normalize=True, title="User Confusion Matrix")
# plot_embedding(args.model_path, results["domain_embds"], args.data, args.domain_length)
# def plot_embedding(model_path, domain_embedding, data, domain_length):
# logger.info("visualize embedding")
# domain_encs, labels = dataset.load_or_generate_domains(data, domain_length)
# visualize.plot_embedding(domain_embedding, labels, path="{}/embd_svd.png".format(model_path), method="svd")
def main_visualize_all(): def main_visualize_all():
_, _, name_val, hits_vt, hits_trusted, server_val = dataset.load_or_generate_raw_h5data(args.data, _, _, name_val, hits_vt, hits_trusted, server_val = dataset.load_or_generate_raw_h5data(args.data,
args.data, args.data,
@ -706,6 +707,7 @@ def main_beta():
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
def plot_overall_result(): def plot_overall_result():
path, model_prefix = os.path.split(os.path.normpath(args.output_prefix)) path, model_prefix = os.path.split(os.path.normpath(args.output_prefix))
try: try:
@ -816,7 +818,6 @@ def main_stats2():
print() print()
def main(): def main():
if "train" == args.mode: if "train" == args.mode:
main_train() main_train()