From b1f48c1895531a249b177dea120bd8c64f16902d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Knaebel?= Date: Mon, 6 Nov 2017 21:51:49 +0100 Subject: [PATCH] add soft parameter sharing network --- hyperband.py | 17 ++++++++++++++- main.py | 17 ++++++++++++++- models/__init__.py | 5 ++++- models/pauls_networks.py | 46 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/hyperband.py b/hyperband.py index 62d2931..ad2318a 100644 --- a/hyperband.py +++ b/hyperband.py @@ -8,6 +8,7 @@ from random import random as rng from time import ctime, time import joblib +import keras.backend as K import numpy as np from keras.callbacks import EarlyStopping @@ -47,14 +48,28 @@ class Hyperband: def try_params(self, n_iterations, params): n_iterations = int(round(n_iterations)) - embedding, model, new_model, long_model = models.get_models_by_params(params) + embedding, model, new_model, long_model, soft_model = models.get_models_by_params(params) if params["type"] in ("inter", "staggered"): model = new_model if params["type"] == "long": model = long_model + if params["type"] == "soft": + model = soft_model + model = create_model(model, params["model_output"]) + if params["type"] == "soft": + conv_server = model.get_layer("conv_server").trainable_weights + conv_client = model.get_layer("conv_client").trainable_weights + l1 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(conv_server, conv_client)] + model.add_loss(l1) + + dense_server = model.get_layer("dense_server").trainable_weights + dense_client = model.get_layer("dense_client").trainable_weights + l2 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(dense_server, dense_client)] + model.add_loss(l2) + callbacks = [EarlyStopping(monitor='val_loss', patience=5, verbose=False)] diff --git a/main.py b/main.py index c5ae20a..8242969 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ import operator import os import joblib +import keras.backend as K import numpy as np import pandas as pd import tensorflow as tf @@ -246,13 +247,27 @@ def main_train(param=None): custom_sample_weights = None logger.info(f"Generator model with params: {param}") - embedding, model, new_model, long_model = models.get_models_by_params(param) + embedding, model, new_model, long_model, soft_model = models.get_models_by_params(param) if args.model_type in ("inter", "staggered"): model = new_model if args.model_type == "long": model = long_model + if args.model_type == "soft": + model = soft_model + model = create_model(model, args.model_output) + + if args.model_type == "soft": + conv_server = model.get_layer("conv_server").trainable_weights + conv_client = model.get_layer("conv_client").trainable_weights + l1 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(conv_server, conv_client)] + model.add_loss(l1) + + dense_server = model.get_layer("dense_server").trainable_weights + dense_client = model.get_layer("dense_client").trainable_weights + l2 = [0.001 * K.sum(K.abs(x - y)) for (x, y) in zip(dense_server, dense_client)] + model.add_loss(l2) features = {"ipt_domains": domain_tr.value, "ipt_flows": flow_tr.value} if args.model_output == "both": diff --git a/models/__init__.py b/models/__init__.py index 2919241..58ff50d 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -46,7 +46,10 @@ def get_models_by_params(params: dict): long = networks.get_new_model2(0.25, flow_features, hidden_embedding, window_size, domain_length, filter_main, kernel_main, dense_dim, embedding_model, model_output) - return embedding_model, final, inter, long + soft = networks.get_new_soft(0.25, flow_features, hidden_embedding, window_size, domain_length, + filter_main, kernel_main, dense_dim, embedding_model, model_output) + + return embedding_model, final, inter, long, soft def get_server_model_by_params(params: dict): diff --git a/models/pauls_networks.py b/models/pauls_networks.py index 39d4320..c670a5a 100644 --- a/models/pauls_networks.py +++ b/models/pauls_networks.py @@ -135,3 +135,49 @@ def get_new_model2(dropout, flow_features, domain_features, window_size, domain_ out_client = Dense(1, activation='sigmoid', name="client")(y) return Model(ipt_domains, ipt_flows, out_client, out_server) + + +import keras.backend as K + + +def get_new_soft(dropout, flow_features, domain_features, window_size, domain_length, cnn_dims, kernel_size, + dense_dim, cnn, model_output="both") -> Model: + def dist_reg(distant_layer): + def dist_reg_h(weights): + print("REG FUNCTION") + print(weights) + print(distant_layer) + return 0.01 * K.sum(K.abs(weights - distant_layer)) + + return dist_reg_h + + ipt_domains = Input(shape=(window_size, domain_length), name="ipt_domains") + ipt_flows = Input(shape=(window_size, flow_features), name="ipt_flows") + encoded = TimeDistributed(cnn, name="domain_cnn")(ipt_domains) + merged = keras.layers.concatenate([encoded, ipt_flows], -1) + y = conv_server = Conv1D(cnn_dims, + kernel_size, + activation='relu', name="conv_server")(merged) + # remove temporal dimension by global max pooling + y = GlobalMaxPooling1D()(y) + y = Dropout(dropout)(y) + y = dense_server = Dense(dense_dim, + activation="relu", + name="dense_server")(y) + out_server = Dense(1, activation="sigmoid", name="server")(y) + # CNN processing a small slides of flow windows + y = Conv1D(cnn_dims, + kernel_size, + activation='relu', name="conv_client")(merged) + # remove temporal dimension by global max pooling + y = GlobalMaxPooling1D()(y) + y = Dropout(dropout)(y) + y = Dense(dense_dim, + activation='relu', + name="dense_client")(y) + + out_client = Dense(1, activation='sigmoid', name="client")(y) + # model = KerasModel(inputs=(ipt_domains, ipt_flows), outputs=(out_client, out_server)) + + + return Model(ipt_domains, ipt_flows, out_client, out_server)