removed ys from training data generation

This commit is contained in:
René Knaebel 2017-06-30 17:42:18 +02:00
parent deac7f9e58
commit 8334e9a84f

View File

@ -110,6 +110,7 @@ def create_dataset_from_flows(user_flow_df, char_dict, maxLen, threshold=3, wind
def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
use_cisco_features=False, urlSIPDIct=dict(),
window_size=10):
# TODO: check for hits vs vth consistency
if 'hits' in dfs[0].keys():
hitName = 'hits'
elif 'virusTotalHits' in dfs[0].keys():
@ -120,7 +121,6 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
if use_cisco_features:
numFeatures += numCiscoFeatures
Xs = []
ys = []
hits = []
names = []
servers = []
@ -141,12 +141,11 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
Xs[ctr][i, :] = get_flow_features(dfs[i].iloc[j])
ctr += 1
ys.append(discretize_label(dfs[i][hitName], threshold))
hits.append(np.max(dfs[i][hitName]))
names.append(np.unique(dfs[i]['user_hash']))
servers.append(np.max(dfs[i]['serverLabel']))
trusted_hits.append(np.max(dfs[i]['trustedHits']))
return Xs, np.array(ys), np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
return Xs, np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
def discretize_label(values, threshold):