removed ys from training data generation
This commit is contained in:
parent
deac7f9e58
commit
8334e9a84f
@ -110,6 +110,7 @@ def create_dataset_from_flows(user_flow_df, char_dict, maxLen, threshold=3, wind
|
|||||||
def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
||||||
use_cisco_features=False, urlSIPDIct=dict(),
|
use_cisco_features=False, urlSIPDIct=dict(),
|
||||||
window_size=10):
|
window_size=10):
|
||||||
|
# TODO: check for hits vs vth consistency
|
||||||
if 'hits' in dfs[0].keys():
|
if 'hits' in dfs[0].keys():
|
||||||
hitName = 'hits'
|
hitName = 'hits'
|
||||||
elif 'virusTotalHits' in dfs[0].keys():
|
elif 'virusTotalHits' in dfs[0].keys():
|
||||||
@ -120,7 +121,6 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
|||||||
if use_cisco_features:
|
if use_cisco_features:
|
||||||
numFeatures += numCiscoFeatures
|
numFeatures += numCiscoFeatures
|
||||||
Xs = []
|
Xs = []
|
||||||
ys = []
|
|
||||||
hits = []
|
hits = []
|
||||||
names = []
|
names = []
|
||||||
servers = []
|
servers = []
|
||||||
@ -141,12 +141,11 @@ def create_dataset_from_lists(domains, dfs, vocab, maxLen, threshold=3,
|
|||||||
Xs[ctr][i, :] = get_flow_features(dfs[i].iloc[j])
|
Xs[ctr][i, :] = get_flow_features(dfs[i].iloc[j])
|
||||||
ctr += 1
|
ctr += 1
|
||||||
|
|
||||||
ys.append(discretize_label(dfs[i][hitName], threshold))
|
|
||||||
hits.append(np.max(dfs[i][hitName]))
|
hits.append(np.max(dfs[i][hitName]))
|
||||||
names.append(np.unique(dfs[i]['user_hash']))
|
names.append(np.unique(dfs[i]['user_hash']))
|
||||||
servers.append(np.max(dfs[i]['serverLabel']))
|
servers.append(np.max(dfs[i]['serverLabel']))
|
||||||
trusted_hits.append(np.max(dfs[i]['trustedHits']))
|
trusted_hits.append(np.max(dfs[i]['trustedHits']))
|
||||||
return Xs, np.array(ys), np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
|
return Xs, np.array(hits), np.array(names), np.array(servers), np.array(trusted_hits)
|
||||||
|
|
||||||
|
|
||||||
def discretize_label(values, threshold):
|
def discretize_label(values, threshold):
|
||||||
|
Loading…
Reference in New Issue
Block a user