-
F401
'argparse' imported but unused
1 import argparse
2 # import utils_parsing as parse
3 import os
4
5 import random
-
F401
'copy' imported but unused
6 import copy
-
F401
'json' imported but unused
7 import json
8
9 import torch
10 import numpy as np
11
-
F401
'torch_geometric.data.DataLoader' imported but unused
12 from torch_geometric.data import DataLoader
-
F401
'torch_geometric.data.Data' imported but unused
13 from torch_geometric.data import Data
14
15 from .utils import prepare_dataset, get_custom_edge_list
-
F401
'.utils_data_prep.separate_data' imported but unused
-
F401
'.utils_data_prep.separate_data_given_split' imported but unused
16 from .utils_data_prep import separate_data, separate_data_given_split
-
F401
'ogb.graphproppred.PygGraphPropPredDataset' imported but unused
17 from ogb.graphproppred import Evaluator, PygGraphPropPredDataset
18 from .utils_ids import subgraph_counts2ids
-
E501
Line too long (107 > 79 characters)
19 from .utils_graph_processing import subgraph_isomorphism_edge_counts, subgraph_isomorphism_vertex_counts, \
-
E501
Line too long (83 > 79 characters)
20 induced_edge_automorphism_orbits, edge_automorphism_orbits, automorphism_orbits
21 from .utils_encoding import encode
22
23
24 class GSN():
25
26 def __init__(self, dataset_name, dataset_group, induced, id_type, k):
27
28 super(GSN, self).__init__()
29
30 # set seeds to ensure reproducibility
31 self.seed = 0
32 self.split_seed = 0
33 self.np_seed = 0
34
-
E501
Line too long (83 > 79 characters)
35 # set multiprocessing to true in order to do the precomputation in parallel
36 self.multiprocessing = False
37 self.num_processes = 64
38
-
E266
Too many leading '#' for block comment
39 ###### data loader parameters
40 self.num_workers = 0
41 self.num_threads = 1
-
E266
Too many leading '#' for block comment
42 ###### these are to select the dataset:
43 # - dataset can be bionformatics or social and states the class;
44 # - name is for the specific problem itself
45 self.dataset = dataset_group
46 self.dataset_name = dataset_name
47
-
E266
Too many leading '#' for block comment
48 ###### set degree_as_tag to True to use the degree as node features;
49 # set retain_features to True to keep the existing features as well;
50 self.degree_as_tag = False
51 self.retain_features = False
52
-
E266
Too many leading '#' for block comment
53 ###### used only for ogb to reproduce the different configurations,
54 # i.e. additional features (full) or not (simple)
55 self.features_scope = 'full'
56
57 # denotes the aggregation used by the virtual node
58 # parser.add_argument('--vn_pooling', type=str, default='sum')
-
E501
Line too long (88 > 79 characters)
59 # parser.add_argument('--input_vn_encoder', type=str, default='one_hot_encoder')
60 # parser.add_argument('--d_out_vn_encoder', type=int, default=None)
61 # parser.add_argument('--d_out_vn', type=int, default=None)
-
E266
Too many leading '#' for block comment
62 ###### substructure-related parameters:
63 # - id_type: substructure family
64 # - induced: graphlets vs motifs
-
E501
Line too long (147 > 79 characters)
65 # - edge_automorphism: induced edge automorphism or line graph edge automorphism (slightly larger group than the induced edge automorphism)
66 # - k: size of substructures that are used; e.g. k=3 means three nodes
67 # - id_scope: local vs global --> GSN-e vs GSN-v
68
69 self.id_type = id_type
70 self.induced = induced
71 self.edge_automorphism = 'induced'
72 self.k = k
73 self.id_scope = 'local'
74 self.custom_edge_list = None
75 self.directed = False
76 self.directed_orbits = False
77
-
E266
Too many leading '#' for block comment
78 ###### encoding args: different ways to encode discrete data
79 self.id_encoding = 'one_hot_unique'
80 self.degree_encoding = 'one_hot_unique'
81
-
E501
Line too long (93 > 79 characters)
82 # binning and minmax encoding parameters. NB: not used in our experimental evaluation
83 self.id_bins = None
84 self.degree_bins = None
85 self.id_strategy = 'uniform'
86 self.degree_strategy = 'uniform'
87 self.id_range = None
88 self.degree_range = None
-
E501
Line too long (84 > 79 characters)
89 # parser.add_argument('--id_embedding', type=str, default='one_hot_encoder')
90 # parser.add_argument('--d_out_id_embedding', type=int, default=None)
-
E501
Line too long (88 > 79 characters)
91 # parser.add_argument('--degree_embedding', type=str, default='one_hot_encoder')
-
E501
Line too long (81 > 79 characters)
92 # parser.add_argument('--d_out_degree_embedding', type=int, default=None)
93
94 # parser.add_argument('--input_node_encoder', type=str, default='None')
95 # parser.add_argument('--d_out_node_encoder', type=int, default=None)
96 # parser.add_argument('--edge_encoder', type=str, default='None')
97 # parser.add_argument('--d_out_edge_encoder', type=int, default=None)
-
E266
Too many leading '#' for block comment
98 ###### model to be used and architecture parameters, in particular
99 # - d_h: is the dimension for internal mlps, set to None to
100 # make it equal to d_out
101 # - final_projection: is for jumping knowledge, specifying
102 # which layer is accounted for in the last model stage, if
103 # the list has only one element, that that value gets applied
104 # to all the layers
-
E501
Line too long (107 > 79 characters)
105 # - jk_mlp: set it to True to use an MLP after each jk layer, otherwise a linear layer will be used
106 # parser.add_argument('--model_name', type=str, default='GSN_sparse')
-
E501
Line too long (87 > 79 characters)
107 # parser.add_argument('--random_features', type=parse.str2bool, default=False)
108 # parser.add_argument('--num_mlp_layers', type=int, default=2)
109 # parser.add_argument('--d_h', type=int, default=None)
110 # parser.add_argument('--activation_mlp', type=str, default='relu')
111 # parser.add_argument('--bn_mlp', type=parse.str2bool, default=True)
112 # parser.add_argument('--num_layers', type=int, default=2)
113 self.num_layers = 2
114 # parser.add_argument('--d_msg', type=int, default=None)
115 # parser.add_argument('--d_out', type=int, default=16)
116 # parser.add_argument('--bn', type=parse.str2bool, default=True)
117 # parser.add_argument('--dropout_features', type=float, default=0)
118 # parser.add_argument('--activation', type=str, default='relu')
-
E501
Line too long (80 > 79 characters)
119 # parser.add_argument('--train_eps', type=parse.str2bool, default=False)
120 # parser.add_argument('--aggr', type=str, default='add')
121 # parser.add_argument('--flow', type=str, default='source_to_target')
122
-
E501
Line too long (93 > 79 characters)
123 # parser.add_argument('--final_projection', type=parse.str2list2bool, default=[True])
124 # parser.add_argument('--jk_mlp', type=parse.str2bool, default=False)
125 # parser.add_argument('--residual', type=parse.str2bool, default=False)
126
127 # parser.add_argument('--readout', type=str, default='sum')
128
-
E266
Too many leading '#' for block comment
129 ###### architecture variations:
130 # - msg_kind: gin (extends gin with structural identifiers),
-
E501
Line too long (88 > 79 characters)
131 # general (general formulation with MLPs - eq 3,4 of the main paper)
-
E501
Line too long (92 > 79 characters)
132 # ogb (extends the architecture used in ogb with structural identifiers)
-
E501
Line too long (92 > 79 characters)
133 # - inject*: passes the relevant variable to deeper layers akin to skip connections.
-
E501
Line too long (96 > 79 characters)
134 # If set to False, then the variable is used only as input to the first layer
135
136 self.msg_kind = 'general'
137 self.inject_ids = False
138 self.inject_degrees = False
139 self.inject_edge_features = True
140
141 self.device_idx = 0
142
-
E266
Too many leading '#' for block comment
143 ## ----------------------------------- argument processing
-
E501
Line too long (83 > 79 characters)
144 # args, extract_ids_fn, count_fn, automorphism_fn = process_arguments(args)
145
146 self.extract_id_fn = subgraph_counts2ids
147
-
E266
Too many leading '#' for block comment
-
E501
Line too long (94 > 79 characters)
148 ###### choose the function that computes the automorphism group and the orbits #######
149 if self.edge_automorphism == 'induced':
-
E501
Line too long (120 > 79 characters)
150 self.automorphism_fn = induced_edge_automorphism_orbits if self.id_scope == 'local' else automorphism_orbits
151 elif self.edge_automorphism == 'line_graph':
-
E501
Line too long (112 > 79 characters)
152 self.automorphism_fn = edge_automorphism_orbits if self.id_scope == 'local' else automorphism_orbits
153 else:
154 raise NotImplementedError
155
-
E266
Too many leading '#' for block comment
-
E501
Line too long (82 > 79 characters)
156 ###### choose the function that computes the subgraph isomorphisms #######
-
E501
Line too long (124 > 79 characters)
157 self.count_fn = subgraph_isomorphism_edge_counts if self.id_scope == 'local' else subgraph_isomorphism_vertex_counts
158
-
E266
Too many leading '#' for block comment
159 ###### choose the substructures: usually loaded from networkx,
-
E266
Too many leading '#' for block comment
-
E501
Line too long (80 > 79 characters)
160 ###### except for 'all_simple_graphs' where they need to be precomputed,
-
E266
Too many leading '#' for block comment
161 ###### or when a custom edge list is provided in the input by the user
162 if self.id_type in ['cycle_graph',
163 'path_graph',
164 'complete_graph',
165 'binomial_tree',
166 'star_graph',
167 'nonisomorphic_trees']:
168 # self.k = self.k[0]
169 k_max = self.k
170 k_min = 2 if self.id_type == 'star_graph' else 3
-
E501
Line too long (101 > 79 characters)
171 self.custom_edge_list = get_custom_edge_list(list(range(k_min, k_max + 1)), self.id_type)
172
173 # elif self.id_type in ['cycle_graph_chosen_k',
174 # 'path_graph_chosen_k',
175 # 'complete_graph_chosen_k',
176 # 'binomial_tree_chosen_k',
177 # 'star_graph_chosen_k',
178 # 'nonisomorphic_trees_chosen_k']:
-
E501
Line too long (104 > 79 characters)
179 # self.custom_edge_list = get_custom_edge_list(self.k, self.id_type.replace('_chosen_k',''))
180 # elif args['id_type'] in ['all_simple_graphs']:
181 # args['k'] = args['k'][0]
182 # k_max = args['k']
183 # k_min = 3
184 # filename = os.path.join(args['root_folder'], 'all_simple_graphs')
-
E501
Line too long (111 > 79 characters)
185 # args['custom_edge_list'] = get_custom_edge_list(list(range(k_min, k_max + 1)), filename=filename)
186
187 # elif args['id_type'] in ['all_simple_graphs_chosen_k']:
188 # filename = os.path.join(args['root_folder'], 'all_simple_graphs')
-
E501
Line too long (91 > 79 characters)
189 # args['custom_edge_list'] = get_custom_edge_list(args['k'], filename=filename)
190
191 # elif args['id_type'] in ['diamond_graph']:
192 # args['k'] = None
193 # graph_nx = nx.diamond_graph()
194 # args['custom_edge_list'] = [list(graph_nx.edges)]
195
196 # elif args['id_type'] == 'custom':
-
E501
Line too long (95 > 79 characters)
197 # assert args['custom_edge_list'] is not None, "Custom edge list must be provided."
198
199 # else:
-
E501
Line too long (110 > 79 characters)
200 # raise NotImplementedError("Identifiers {} are not currently supported.".format(args['id_type']))
201
-
E501
Line too long (111 > 79 characters)
202 # define if degree is going to be used as a feature and when (for each layer or only at initialization)
203 if self.inject_degrees:
-
E501
Line too long (85 > 79 characters)
204 self.degree_as_tag = [self.degree_as_tag for _ in range(self.num_layers)]
205 else:
-
E501
Line too long (99 > 79 characters)
206 self.degree_as_tag = [self.degree_as_tag] + [False for _ in range(self.num_layers - 1)]
207
-
E501
Line too long (99 > 79 characters)
208 # define if existing features are going to be retained when the degree is used as a feature
-
E501
Line too long (98 > 79 characters)
209 self.retain_features = [self.retain_features] + [True for _ in range(self.num_layers - 1)]
210
-
E501
Line too long (117 > 79 characters)
211 # replicate d_out dimensions if the rest are not defined (msg function, mlp hidden dimension, encoders, etc.)
212 # and repeat hyperparams for every layer
213 # if args['d_msg'] == -1:
214 # args['d_msg'] = [None for _ in range(args['num_layers'])]
215 # elif args['d_msg'] is None:
-
E501
Line too long (80 > 79 characters)
216 # args['d_msg'] = [args['d_out'] for _ in range(args['num_layers'])]
217 # else:
-
E501
Line too long (80 > 79 characters)
-
W291
Trailing whitespace
218 # args['d_msg'] = [args['d_msg'] for _ in range(args['num_layers'])]
219 # if args['d_h'] is None:
-
E501
Line too long (111 > 79 characters)
220 # args['d_h'] = [[args['d_out']] * (args['num_mlp_layers'] - 1) for _ in range(args['num_layers'])]
221 # else:
-
E501
Line too long (109 > 79 characters)
222 # args['d_h'] = [[args['d_h']] * (args['num_mlp_layers'] - 1) for _ in range(args['num_layers'])]
223 # if args['d_out_edge_encoder'] is None:
-
E501
Line too long (93 > 79 characters)
224 # args['d_out_edge_encoder'] = [args['d_out'] for _ in range(args['num_layers'])]
225 # else:
-
E501
Line too long (106 > 79 characters)
226 # args['d_out_edge_encoder'] = [args['d_out_edge_encoder'] for _ in range(args['num_layers'])]
227 # if args['d_out_node_encoder'] is None:
228 # args['d_out_node_encoder'] = args['d_out']
229 # else:
230 # pass
231 # if args['d_out_id_embedding'] is None:
232 # args['d_out_id_embedding'] = args['d_out']
233 # else:
234 # pass
235 # if args['d_out_degree_embedding'] is None:
236 # args['d_out_degree_embedding'] = args['d_out']
237 # else:
238 # pass
239
240 # # repeat hyperparams for every layer
241 # args['d_out'] = [args['d_out'] for _ in range(args['num_layers'])]
242
-
E501
Line too long (84 > 79 characters)
243 # args['train_eps'] = [args['train_eps'] for _ in range(args['num_layers'])]
244
245 # if len(args['final_projection']) == 1:
-
E501
Line too long (114 > 79 characters)
246 # args['final_projection'] = [args['final_projection'][0] for _ in range(args['num_layers'])] + [True]
247
248 # args['bn'] = [args['bn'] for _ in range(args['num_layers'])]
-
E501
Line too long (127 > 79 characters)
249 # args['dropout_features'] = [args['dropout_features'] for _ in range(args['num_layers'])] + [args['dropout_features']]
250
252 if self.dataset == 'ogb':
253 evaluator = Evaluator(self.dataset_name)
254 elif self.dataset_name == 'MNIST':
-
F841
Local variable 'evaluator' is assigned to but never used
255 evaluator = Evaluator('ogbg-ppa')
256 else:
257 raise NotImplementedError
258
259
-
W293
Blank line contains whitespace
260
261
-
W293
Blank line contains whitespace
262
-
E303
Too many blank lines (5)
-
E266
Too many leading '#' for block comment
263 ## ----------------------------------- infrastructure
264
265 torch.manual_seed(self.seed)
266 torch.cuda.manual_seed(self.seed)
267 torch.cuda.manual_seed_all(self.seed)
268 torch.backends.cudnn.deterministic = True
269 torch.backends.cudnn.benchmark = False
270 np.random.seed(self.np_seed)
271 os.environ['PYTHONHASHSEED'] = str(self.seed)
272 random.seed(self.seed)
273 print('[info] Setting all random seeds {}'.format(self.seed))
274
275 torch.set_num_threads(self.num_threads)
-
F841
Local variable 'device' is assigned to but never used
-
E501
Line too long (101 > 79 characters)
276 device = torch.device("cuda:" + str(self.device_idx) if torch.cuda.is_available() else "cpu")
277
-
E266
Too many leading '#' for block comment
-
E501
Line too long (111 > 79 characters)
278 ## ----------------------------------- datasets: prepare and preprocess (count or load subgraph counts)
279
280 def preprocess(self, root_path):
281 subgraph_params = {'induced': self.induced,
282 'edge_list': self.custom_edge_list,
283 'directed': self.directed,
284 'directed_orbits': self.directed_orbits}
-
E501
Line too long (83 > 79 characters)
285 graphs_ptg, num_classes, orbit_partition_sizes = prepare_dataset(root_path,
-
E501
Line too long (86 > 79 characters)
286 self.dataset,
-
E501
Line too long (91 > 79 characters)
287 self.dataset_name,
-
E501
Line too long (87 > 79 characters)
288 self.id_scope,
-
E501
Line too long (86 > 79 characters)
289 self.id_type,
-
E501
Line too long (80 > 79 characters)
290 self.k,
-
E501
Line too long (92 > 79 characters)
291 self.extract_id_fn,
-
E501
Line too long (87 > 79 characters)
292 self.count_fn,
-
E501
Line too long (94 > 79 characters)
293 self.automorphism_fn,
-
E501
Line too long (94 > 79 characters)
294 self.multiprocessing,
-
E501
Line too long (92 > 79 characters)
295 self.num_processes,
-
E501
Line too long (91 > 79 characters)
296 **subgraph_params)
297
-
E266
Too many leading '#' for block comment
298 ## node and edge feature dimensions
299 if graphs_ptg[0].x.dim() == 1:
300 num_features = 1
301 else:
302 num_features = graphs_ptg[0].num_features
303
304 if hasattr(graphs_ptg[0], 'edge_features'):
305 if graphs_ptg[0].edge_features.dim() == 1:
306 num_edge_features = 1
307 else:
308 num_edge_features = graphs_ptg[0].edge_features.shape[1]
309 else:
310 num_edge_features = None
311
312 if self.dataset == 'chemical' and self.dataset_name == 'ZINC':
313 d_in_node_encoder, d_in_edge_encoder = torch.load(
314 os.path.join(root_path, 'processed', 'num_feature_types.pt'))
-
E501
Line too long (91 > 79 characters)
315 d_in_node_encoder, d_in_edge_encoder = [d_in_node_encoder], [d_in_edge_encoder]
316 else:
317 d_in_node_encoder = [num_features]
318 d_in_edge_encoder = [num_edge_features]
-
E266
Too many leading '#' for block comment
-
E501
Line too long (98 > 79 characters)
319 ## ----------------------------------- encode ids and degrees (and possibly edge features)
320
321 degree_encoding = self.degree_encoding if self.degree_as_tag else None
322 id_encoding = self.id_encoding if self.id_encoding != 'None' else None
323 encoding_parameters = {
324 'ids': {
325 'bins': self.id_bins,
326 'strategy': self.id_strategy,
327 'range': self.id_range,
328 },
329 'degree': {
330 'bins': self.degree_bins,
331 'strategy': self.degree_strategy,
332 'range': self.degree_range}}
333
334 print("Encoding substructure counts and degree features... ", end='')
-
E501
Line too long (85 > 79 characters)
335 graphs_ptg, encoder_ids, d_id, encoder_degrees, d_degree = encode(graphs_ptg,
-
E501
Line too long (86 > 79 characters)
336 id_encoding,
-
E501
Line too long (90 > 79 characters)
337 degree_encoding,
-
E501
Line too long (96 > 79 characters)
338 **encoding_parameters)
339
340 return graphs_ptg, encoder_ids, d_id, d_degree