removed ys from training data generation
This commit is contained in:
parent
deac7f9e58
commit
8334e9a84f
@ -110,6 +110,7 @@ def create_dataset_from_flows(user_flow_df, char_dict, maxLen, threshold=3, wind
|
||||
def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
||||
use_cisco_features=False, urlSIPDIct=dict(),
|
||||
window_size=10):
|
||||
# TODO: check for hits vs vth consistency
|
||||
if 'hits' in dfs[0].keys():
|
||||
hitName = 'hits'
|
||||
elif 'virusTotalHits' in dfs[0].keys():
|
||||
@ -120,7 +121,6 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
||||
if use_cisco_features:
|
||||
numFeatures += numCiscoFeatures
|
||||
Xs = []
|
||||
ys = []
|
||||
hits = []
|
||||
names = []
|
||||
servers = []
|
||||
@ -141,12 +141,11 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
||||
Xs[ctr][i, :] = get_flow_features(dfs[i].iloc[j])
|
||||
ctr += 1
|
||||
|
||||
ys.append(discretize_label(dfs[i][hitName], threshold))
|
||||
hits.append(np.max(dfs[i][hitName]))
|
||||
names.append(np.unique(dfs[i]['user_hash']))
|
||||
servers.append(np.max(dfs[i]['serverLabel']))
|
||||
trusted_hits.append(np.max(dfs[i]['trustedHits']))
|
||||
return Xs, np.array(ys), np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
|
||||
return Xs, np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
|
||||
|
||||
|
||||
def discretize_label(values, threshold):
|
||||
|
Loading…
Reference in New Issue
Block a user