added params
This commit is contained in:
parent
3c4be52bb6
commit
be56112b33
4
Makefile
4
Makefile
@ -1,3 +1,5 @@
|
|||||||
test:
|
test:
|
||||||
python3 main.py --epochs 1 --batch 64 --train data/rk_data.csv.gz --test data/rk_data.csv.gz
|
python3 main.py --modes train --epochs 1 --batch 64 --train data/rk_data.csv.gz
|
||||||
|
|
||||||
|
hyper:
|
||||||
|
python3 main.py --modes hyperband --epochs 1 --batch 64 --train data/rk_data.csv.gz
|
||||||
|
174
hyperband.py
174
hyperband.py
@ -1,76 +1,128 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# implementation of hyperband:
|
# implementation of hyperband:
|
||||||
# https://arxiv.org/pdf/1603.06560.pdf
|
# https://arxiv.org/pdf/1603.06560.pdf
|
||||||
|
import random
|
||||||
|
from math import log, ceil
|
||||||
|
from random import random as rng
|
||||||
|
from time import time, ctime
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
import models
|
||||||
def get_hyperparameter_configuration(configGenerator, n):
|
|
||||||
configurations = []
|
|
||||||
for i in np.arange(0, n, 1):
|
|
||||||
configurations.append(configGenerator())
|
|
||||||
return configurations
|
|
||||||
|
|
||||||
|
|
||||||
def run_then_return_val_loss(config, r_i, modelGenerator, trainData, trainLabel,
|
def sample_params(param_distribution: dict):
|
||||||
testData, testLabel):
|
p = {}
|
||||||
# parameter
|
for key, val in param_distribution.items():
|
||||||
batch_size = 128
|
p[key] = random.choice(val)
|
||||||
model = modelGenerator(config)
|
return p
|
||||||
if model != None:
|
|
||||||
model.fit(x=trainData, y=trainLabel,
|
|
||||||
epochs=int(r_i), shuffle=True, initial_epoch=0,
|
class Hyperband:
|
||||||
batch_size=batch_size)
|
def __init__(self, param_distribution, X, y):
|
||||||
score = model.evaluate(testData, testLabel,
|
self.get_params = lambda: sample_params(param_distribution)
|
||||||
batch_size=batch_size)
|
|
||||||
score = score[0]
|
self.max_iter = 81 # maximum iterations per configuration
|
||||||
|
self.eta = 3 # defines configuration downsampling rate (default = 3)
|
||||||
|
|
||||||
|
self.logeta = lambda x: log(x) / log(self.eta)
|
||||||
|
self.s_max = int(self.logeta(self.max_iter))
|
||||||
|
self.B = (self.s_max + 1) * self.max_iter
|
||||||
|
|
||||||
|
self.results = [] # list of dicts
|
||||||
|
self.counter = 0
|
||||||
|
self.best_loss = np.inf
|
||||||
|
self.best_counter = -1
|
||||||
|
|
||||||
|
self.X = X
|
||||||
|
self.y = y
|
||||||
|
|
||||||
|
def try_params(self, n_iterations, params):
|
||||||
|
n_iterations = int(round(n_iterations))
|
||||||
|
embedding, model = models.get_models_by_params(params)
|
||||||
|
model.compile(optimizer='adam',
|
||||||
|
loss='categorical_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
history = model.fit(self.X,
|
||||||
|
self.y,
|
||||||
|
batch_size=params["batch_size"],
|
||||||
|
epochs=n_iterations,
|
||||||
|
shuffle=True,
|
||||||
|
validation_split=0.2)
|
||||||
|
|
||||||
|
return {"loss": history.history['loss'][-1]}
|
||||||
|
|
||||||
|
# can be called multiple times
|
||||||
|
def run(self, skip_last=0, dry_run=False):
|
||||||
|
|
||||||
|
for s in reversed(range(self.s_max + 1)):
|
||||||
|
|
||||||
|
# initial number of configurations
|
||||||
|
n = int(ceil(self.B / self.max_iter / (s + 1) * self.eta ** s))
|
||||||
|
|
||||||
|
# initial number of iterations per config
|
||||||
|
r = self.max_iter * self.eta ** (-s)
|
||||||
|
|
||||||
|
# n random configurations
|
||||||
|
T = [self.get_params() for i in range(n)]
|
||||||
|
|
||||||
|
for i in range((s + 1) - int(skip_last)): # changed from s + 1
|
||||||
|
|
||||||
|
# Run each of the n configs for <iterations>
|
||||||
|
# and keep best (n_configs / eta) configurations
|
||||||
|
|
||||||
|
n_configs = n * self.eta ** (-i)
|
||||||
|
n_iterations = r * self.eta ** (i)
|
||||||
|
|
||||||
|
print("\n*** {} configurations x {:.1f} iterations each".format(
|
||||||
|
n_configs, n_iterations))
|
||||||
|
|
||||||
|
val_losses = []
|
||||||
|
early_stops = []
|
||||||
|
|
||||||
|
for t in T:
|
||||||
|
|
||||||
|
self.counter += 1
|
||||||
|
print("\n{} | {} | lowest loss so far: {:.4f} (run {})\n".format(
|
||||||
|
self.counter, ctime(), self.best_loss, self.best_counter))
|
||||||
|
|
||||||
|
start_time = time()
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
result = {'loss': rng(), 'log_loss': rng(), 'auc': rng()}
|
||||||
else:
|
else:
|
||||||
score = np.infty
|
result = self.try_params(n_iterations, t) # <---
|
||||||
return score
|
|
||||||
|
|
||||||
|
assert (type(result) == dict)
|
||||||
|
assert ('loss' in result)
|
||||||
|
|
||||||
def top_k(configurations, L, k):
|
seconds = int(round(time() - start_time))
|
||||||
outConfigs = []
|
print("\n{} seconds.".format(seconds))
|
||||||
sortIDs = np.argsort(np.array(L))
|
|
||||||
for i in np.arange(0, k, 1):
|
|
||||||
outConfigs.append(configurations[sortIDs[i]])
|
|
||||||
return outConfigs
|
|
||||||
|
|
||||||
|
loss = result['loss']
|
||||||
|
val_losses.append(loss)
|
||||||
|
|
||||||
def hyperband(R, nu, modelGenerator,
|
early_stop = result.get('early_stop', False)
|
||||||
configGenerator,
|
early_stops.append(early_stop)
|
||||||
trainData, trainLabel,
|
|
||||||
testData, testLabel,
|
|
||||||
outputFile=''):
|
|
||||||
allLosses = []
|
|
||||||
allConfigs = []
|
|
||||||
# input
|
|
||||||
|
|
||||||
# initialization
|
# keeping track of the best result so far (for display only)
|
||||||
s_max = np.floor(np.log(R) / np.log(nu))
|
# could do it be checking results each time, but hey
|
||||||
B = (s_max + 1) * R
|
if loss < self.best_loss:
|
||||||
|
self.best_loss = loss
|
||||||
|
self.best_counter = self.counter
|
||||||
|
|
||||||
for s in np.arange(s_max, -1, -1):
|
result['counter'] = self.counter
|
||||||
n = np.ceil(np.float(B) / np.float(R) * (np.float(np.power(nu, s)) / np.float(s + 1)))
|
result['seconds'] = seconds
|
||||||
r = np.float(R) * np.power(nu, -s)
|
result['params'] = t
|
||||||
configurations = get_hyperparameter_configuration(configGenerator, n)
|
result['iterations'] = n_iterations
|
||||||
for i in np.arange(0, s + 1, 1):
|
|
||||||
n_i = np.floor(np.float(n) * np.power(nu, -i))
|
|
||||||
r_i = np.float(r) * np.power(nu, i)
|
|
||||||
L = []
|
|
||||||
for config in configurations:
|
|
||||||
curLoss = run_then_return_val_loss(config, r_i, modelGenerator,
|
|
||||||
trainData, trainLabel,
|
|
||||||
testData, testLabel)
|
|
||||||
L.append(curLoss)
|
|
||||||
allLosses.append(curLoss)
|
|
||||||
allConfigs.append(config)
|
|
||||||
if outputFile != '':
|
|
||||||
with open(outputFile, 'a') as myfile:
|
|
||||||
myfile.write(str(config) + '\t' + str(curLoss) + \
|
|
||||||
'\t' + str(r_i) + '\n')
|
|
||||||
configurations = top_k(configurations, L, np.floor(np.float(n_i) / nu))
|
|
||||||
|
|
||||||
# print('n_i: ' + str(n_i))
|
self.results.append(result)
|
||||||
# print('r_i: ' + str(r_i))
|
|
||||||
bestConfig = top_k(allConfigs, allLosses, 1)
|
# select a number of best configurations for the next loop
|
||||||
return (bestConfig[0], allConfigs, allLosses)
|
# filter out early stops, if any
|
||||||
|
indices = np.argsort(val_losses)
|
||||||
|
T = [T[i] for i in indices if not early_stops[i]]
|
||||||
|
T = T[0:int(n_configs / self.eta)]
|
||||||
|
|
||||||
|
return self.results
|
||||||
|
67
main.py
67
main.py
@ -3,11 +3,12 @@ import argparse
|
|||||||
from keras.utils import np_utils
|
from keras.utils import np_utils
|
||||||
|
|
||||||
import dataset
|
import dataset
|
||||||
|
import hyperband
|
||||||
import models
|
import models
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
# parser.add_argument("--modes", action="store", dest="modes", nargs="+")
|
parser.add_argument("--modes", action="store", dest="modes", nargs="+")
|
||||||
|
|
||||||
parser.add_argument("--train", action="store", dest="train_data",
|
parser.add_argument("--train", action="store", dest="train_data",
|
||||||
default="data/full_dataset.csv.tar.bz2")
|
default="data/full_dataset.csv.tar.bz2")
|
||||||
@ -24,9 +25,9 @@ parser.add_argument("--model", action="store", dest="model",
|
|||||||
# parser.add_argument("--pred", action="store", dest="pred",
|
# parser.add_argument("--pred", action="store", dest="pred",
|
||||||
# default="")
|
# default="")
|
||||||
#
|
#
|
||||||
# parser.add_argument("--type", action="store", dest="model_type",
|
parser.add_argument("--type", action="store", dest="model_type",
|
||||||
# default="simple_conv")
|
default="paul")
|
||||||
#
|
|
||||||
parser.add_argument("--batch", action="store", dest="batch_size",
|
parser.add_argument("--batch", action="store", dest="batch_size",
|
||||||
default=64, type=int)
|
default=64, type=int)
|
||||||
|
|
||||||
@ -79,13 +80,52 @@ args = parser.parse_args()
|
|||||||
# session = tf.Session(config=config)
|
# session = tf.Session(config=config)
|
||||||
|
|
||||||
|
|
||||||
|
def main_hyperband():
|
||||||
|
char_dict = dataset.get_character_dict()
|
||||||
|
user_flow_df = dataset.get_user_flow_data(args.train_data)
|
||||||
|
|
||||||
|
params = {
|
||||||
|
# static params
|
||||||
|
"type": ["paul"],
|
||||||
|
"batch_size": [64],
|
||||||
|
"vocab_size": [len(char_dict) + 1],
|
||||||
|
"window_size": [10],
|
||||||
|
"domain_length": [40],
|
||||||
|
"flow_features": [3],
|
||||||
|
"input_length": [40],
|
||||||
|
# model params
|
||||||
|
"embedding_size": [16, 32, 64, 128, 256, 512],
|
||||||
|
"filter_embedding": [16, 32, 64, 128, 256, 512],
|
||||||
|
"kernel_embedding": [1, 3, 5, 7, 9],
|
||||||
|
"hidden_embedding": [16, 32, 64, 128, 256, 512],
|
||||||
|
"dropout": [0.5],
|
||||||
|
"domain_features": [16, 32, 64, 128, 256, 512],
|
||||||
|
"filter_main": [16, 32, 64, 128, 256, 512],
|
||||||
|
"kernels_main": [1, 3, 5, 7, 9],
|
||||||
|
"dense_main": [16, 32, 64, 128, 256, 512],
|
||||||
|
}
|
||||||
|
param = hyperband.sample_params(params)
|
||||||
|
print(param)
|
||||||
|
|
||||||
|
print("create training dataset")
|
||||||
|
domain_tr, flow_tr, client_tr, server_tr = dataset.create_dataset_from_flows(
|
||||||
|
user_flow_df, char_dict,
|
||||||
|
max_len=args.domain_length,
|
||||||
|
window_size=args.window)
|
||||||
|
client_tr = np_utils.to_categorical(client_tr, 2)
|
||||||
|
server_tr = np_utils.to_categorical(server_tr, 2)
|
||||||
|
|
||||||
|
hp = hyperband.Hyperband(params, [domain_tr, flow_tr], [client_tr, server_tr])
|
||||||
|
hp.run()
|
||||||
|
|
||||||
|
|
||||||
def main_train():
|
def main_train():
|
||||||
# parameter
|
# parameter
|
||||||
cnnDropout = 0.5
|
cnnDropout = 0.5
|
||||||
cnnHiddenDims = 512
|
cnnHiddenDims = 512
|
||||||
kernel_size = 3
|
kernel_size = 3
|
||||||
filters = 128
|
filters = 128
|
||||||
network = models.pauls_networks
|
network = models.pauls_networks if args.model_type == "paul" else models.renes_networks
|
||||||
|
|
||||||
char_dict = dataset.get_character_dict()
|
char_dict = dataset.get_character_dict()
|
||||||
user_flow_df = dataset.get_user_flow_data(args.train_data)
|
user_flow_df = dataset.get_user_flow_data(args.train_data)
|
||||||
@ -94,6 +134,8 @@ def main_train():
|
|||||||
domain_tr, flow_tr, client_tr, server_tr = dataset.create_dataset_from_flows(
|
domain_tr, flow_tr, client_tr, server_tr = dataset.create_dataset_from_flows(
|
||||||
user_flow_df, char_dict,
|
user_flow_df, char_dict,
|
||||||
max_len=args.domain_length, window_size=args.window)
|
max_len=args.domain_length, window_size=args.window)
|
||||||
|
client_tr = np_utils.to_categorical(client_tr, 2)
|
||||||
|
server_tr = np_utils.to_categorical(server_tr, 2)
|
||||||
|
|
||||||
shared_cnn = network.get_embedding(len(char_dict) + 1, args.embedding, args.domain_length,
|
shared_cnn = network.get_embedding(len(char_dict) + 1, args.embedding, args.domain_length,
|
||||||
args.hidden_char_dims, kernel_size, args.domain_embedding, 0.5)
|
args.hidden_char_dims, kernel_size, args.domain_embedding, 0.5)
|
||||||
@ -105,11 +147,9 @@ def main_train():
|
|||||||
model.summary()
|
model.summary()
|
||||||
|
|
||||||
model.compile(optimizer='adam',
|
model.compile(optimizer='adam',
|
||||||
loss='binary_crossentropy',
|
loss='categorical_crossentropy',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
client_tr = np_utils.to_categorical(client_tr, 2)
|
|
||||||
server_tr = np_utils.to_categorical(server_tr, 2)
|
|
||||||
model.fit([domain_tr, flow_tr],
|
model.fit([domain_tr, flow_tr],
|
||||||
[client_tr, server_tr],
|
[client_tr, server_tr],
|
||||||
batch_size=args.batch_size,
|
batch_size=args.batch_size,
|
||||||
@ -117,6 +157,8 @@ def main_train():
|
|||||||
shuffle=True,
|
shuffle=True,
|
||||||
validation_split=0.2)
|
validation_split=0.2)
|
||||||
|
|
||||||
|
model.save(args.model)
|
||||||
|
|
||||||
|
|
||||||
def main_test():
|
def main_test():
|
||||||
char_dict = dataset.get_character_dict()
|
char_dict = dataset.get_character_dict()
|
||||||
@ -154,7 +196,16 @@ def main_score():
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
if "train" in args.modes:
|
||||||
main_train()
|
main_train()
|
||||||
|
if "hyperband" in args.modes:
|
||||||
|
main_hyperband()
|
||||||
|
if "test" in args.modes:
|
||||||
|
main_test()
|
||||||
|
if "fancy" in args.modes:
|
||||||
|
main_visualization()
|
||||||
|
if "score" in args.modes:
|
||||||
|
main_score()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -2,6 +2,20 @@ import keras
|
|||||||
from keras.engine import Input, Model
|
from keras.engine import Input, Model
|
||||||
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed
|
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, Activation, TimeDistributed
|
||||||
|
|
||||||
|
best_config = {
|
||||||
|
'domain_features': 32,
|
||||||
|
'drop_out': 0.5,
|
||||||
|
'embedding_size': 64,
|
||||||
|
'filter_main': 512,
|
||||||
|
'flow_features': 3,
|
||||||
|
'hidden_dims': 32,
|
||||||
|
'filter_embedding': 32,
|
||||||
|
'hidden_embedding': 32,
|
||||||
|
'kernel_embedding': 8,
|
||||||
|
'kernels_main': 8,
|
||||||
|
'input_length': 40
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_embedding(vocab_size, embedding_size, input_length,
|
def get_embedding(vocab_size, embedding_size, input_length,
|
||||||
filters, kernel_size, hidden_dims, drop_out=0.5):
|
filters, kernel_size, hidden_dims, drop_out=0.5):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user