forked from www24Anonymous/Anonymous
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
92 lines (74 loc) · 3.28 KB
/
utils.py
File metadata and controls
92 lines (74 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
import multiprocessing
from collections import defaultdict
from operator import index
from random import random
from tkinter import ON
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from data_test import *
#Parameter setting
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--epoch', type=int, default=100,
help='Number of epoch. Default is 100.')
parser.add_argument('--batch-size', type=int, default=64,
help='Number of batch_size. Default is 64.')
parser.add_argument('--eval-type', type=str, default='all',
help='The edge type(s) for evaluation.')
parser.add_argument('--schema', type=str, default=None,
help='The metapath schema (e.g., U-I-U,I-U-I).')
parser.add_argument('--dimensions', type=int, default=8,
help='Number of node dimensions. Default is 200.')
parser.add_argument('--edge-dim', type=int, default=4,
help='Number of edge embedding dimensions. Default is 10.')
parser.add_argument('--att-dim', type=int, default=4,
help='Number of attention dimensions. Default is 20.')
parser.add_argument('--window-size', type=int, default=2,
help='Context size for optimization. Default is 5.')
parser.add_argument('--negative-samples', type=int, default=5,
help='Negative samples for optimization. Default is 5.')
return parser.parse_args()
#Load training set data and return instruction entity set and edge set
def load_train_data():
print('loading training data!!!')
dict_edge = get_edge()
all_nodes = list()
edge_data_by_type = dict()
for i in range(len(dict_edge)):
if dict_edge[i][1] not in edge_data_by_type:
edge_data_by_type[dict_edge[i][1]] = list()
x , y = dict_edge[i][0] , dict_edge[i][2]
edge_data_by_type[dict_edge[i][1]].append((x , y))
all_nodes.append(x)
all_nodes.append(y)
all_nodes = list(set(all_nodes)) #Remove duplicate nodes
return all_nodes, edge_data_by_type
#Get Edge Set
def load_edge_data():
return get_edge()
#Load node attributes and use error rate as label
def load_feature_data():
print("load node features!!!")
features = get_features()
all_feature = []
for key , value in features.items():
all_feature.append(np.array(value))
all_feature = np.array(all_feature)
lc = LabelEncoder() #Encoder
for i in range(4,len(all_feature[0])-1):
all_feature[:,i] = lc.fit_transform(all_feature[:,i])
labels = all_feature[:, -1] #label
return all_feature[:, :-1] , labels
#Load basic block information and preprocess
def load_BB_info():
dict_BB , edge = get_BB_info()
nodes = list(dict_BB.keys()) #Basic block label
features = [] #Corresponds the node number to the position of the attribute sequence in the table
for i in range(len(nodes)):
features.append(dict_BB[str(i)])
return nodes, features, edge
if __name__ == '__main__':
nodes, edge = load_train_data()
load_BB_info()