1 import torch
            
            2 import sys
            
            
               
               
                  - 
                     
                        F401
                     
                     'sklearn.preprocessing.KBinsDiscretizer' imported but unused
- 
                     
                        F401
                     
                     'sklearn.preprocessing.OneHotEncoder' imported but unused
- 
                     
                        F401
                     
                     'sklearn.preprocessing.MinMaxScaler' imported but unused
- 
                     
                        F401
                     
                     'sklearn.preprocessing.StandardScaler' imported but unused
- 
                     
                        E501
                     
                     Line too long (95 > 79 characters)
3 from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, MinMaxScaler, StandardScaler 
            4  
            
            5 import numpy as np
            
            6  
            
            7  
            
            8 def encode(graphs, id_encoding, degree_encoding=None, **kwargs):
            
            9     '''
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (89 > 79 characters)
10         Encodes categorical variables such as structural identifiers and degree features. 
            11     '''
            
            12     encoder_ids, d_id = None, [1]*graphs[0].identifiers.shape[1]
            
            13     if id_encoding is not None:
            
            14         id_encoding_fn = getattr(sys.modules[__name__], id_encoding)
            
            15         ids = [graph.identifiers for graph in graphs]
            
            16         encoder_ids = id_encoding_fn(ids, **(kwargs['ids']))
            
            17         encoded_ids = encoder_ids.fit(ids)
            
            18         d_id = encoder_ids.d
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
19 
            20     encoder_degrees, d_degree = None, []
            
            21     if degree_encoding is not None:
            
            22         degree_encoding_fn = getattr(sys.modules[__name__], degree_encoding)
            
            23         degrees = [graph.degrees.unsqueeze(1) for graph in graphs]
            
            24         encoder_degrees = degree_encoding_fn(degrees, **(kwargs['degree']))
            
            25         encoded_degrees = encoder_degrees.fit(degrees)
            
            26         d_degree = encoder_degrees.d
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
27 
            28     for g, graph in enumerate(graphs):
            
            29         if id_encoding is not None:
            
            30             setattr(graph, 'identifiers', encoded_ids[g])
            
            31         if degree_encoding is not None:
            
            32             setattr(graph, 'degrees', encoded_degrees[g])
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
33 
            34     return graphs, encoder_ids, d_id, encoder_degrees, d_degree
            
            35  
            
            36  
            
            37 class one_hot_unique:
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
38 
            39     def __init__(self, tensor_list, **kwargs):
            
            40         tensor_list = torch.cat(tensor_list, 0)
            
            41         self.d = list()
            
            42         self.corrs = dict()
            
            43         for col in range(tensor_list.shape[1]):
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (88 > 79 characters)
44             uniques, corrs = np.unique(tensor_list[:, col], return_inverse=True, axis=0) 
            45             self.d.append(len(uniques))
            
            46             self.corrs[col] = corrs
            
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
48 
            49     def fit(self, tensor_list):
            
            50         pointer = 0
            
            51         encoded_tensors = list()
            
            52         for tensor in tensor_list:
            
            53             n = tensor.shape[0]
            
            54             for col in range(tensor.shape[1]):
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (94 > 79 characters)
55                 translated = torch.LongTensor(self.corrs[col][pointer:pointer+n]).unsqueeze(1) 
            
               
               
                  - 
                     
                        F821
                     
                     Undefined name 'encoded'
- 
                     
                        E501
                     
                     Line too long (88 > 79 characters)
56                 encoded = torch.cat((encoded, translated), 1) if col > 0 else translated 
            57             encoded_tensors.append(encoded)
            
            58             pointer += n
            
            59         return encoded_tensors
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
60 
            61  
            
            62 class one_hot_max:
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
63 
            64     def __init__(self, tensor_list, **kwargs):
            
            
               
               
                  - 
                     
                        E231
                     
                     Missing whitespace after ','
65         tensor_list = torch.cat(tensor_list,0) 
            
               
               
                  - 
                     
                        E231
                     
                     Missing whitespace after ','
- 
                     
                        E501
                     
                     Line too long (85 > 79 characters)
66         self.d = [int(tensor_list[:,i].max()+1) for i in range(tensor_list.shape[1])] 
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
67 
            68     def fit(self, tensor_list):
            
            69         return tensor_list
            
            70  
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
71 
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (103 > 79 characters)
- 
                     
                        W291
                     
                     Trailing whitespace
72 # NB: this encoding scheme has been implemented, but never tested in experiments: use at your own risk. 
            73 '''
            
            74 class minmax:
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
75 
            76     def __init__(self, tensor_list, **kwargs):
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
77 
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (81 > 79 characters)
78         range_scaler = [0.0, 1.0] if kwargs['range'] is None else kwargs['range'] 
            79         self.encoder = MinMaxScaler(feature_range=range_scaler)
            
            80         self.d = [1 for i in range(tensor_list[0].shape[1])]
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
81 
            82     def fit(self, tensor_list):
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
83 
            84         catted = torch.cat(tensor_list, 0).cpu().float().numpy()
            
            85         self.encoder.fit(catted)
            
            86         translated = self.encoder.transform(catted)
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
87 
            88         pointer = 0
            
            89         encoded_tensors = list()
            
            90         for tensor in tensor_list:
            
            91             n = tensor.shape[0]
            
            92             encoded = torch.FloatTensor(translated[pointer:pointer+n,:])
            
            93             encoded_tensors.append(encoded)
            
            94             pointer += n
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
95 
            96         return encoded_tensors
            
            97 '''
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
98 
            99  
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (103 > 79 characters)
100 # NB: this encoding scheme has been implemented, but never tested in experiments: use at your own risk. 
            101 '''
            
            102 class binning:
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
103 
            104     def __init__(self, tensor_list, **kwargs):
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
105 
            106         self.n_bins = kwargs['bins'][0]
            
            107         self.strategy = kwargs['strategy']
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
108 
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
109 
            110     def fit(self, tensor_list):
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
111 
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
112 
            113         catted = torch.cat(tensor_list, 0)
            
            114         translated = None
            
            115         d = []
            
            116         for col in range(catted.shape[1]):
            
            117             tensor_column = catted[:, col].unsqueeze(1).cpu().numpy()
            
            118             print(col, np.unique(tensor_column))
            
            119 #             B = min([self.n_bins[col], len(np.unique(tensor_column))])
            
            120             B = min([self.n_bins, len(np.unique(tensor_column))])
            
            121             if B == 1:
            
            122                 result = torch.ones(tensor_column.shape)
            
            123             else:
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (94 > 79 characters)
124                 encoder = KBinsDiscretizer(n_bins=B, encode='ordinal', strategy=self.strategy) 
            125                 d.append(encoder.n_bins)
            
            126                 encoder.fit(tensor_column)
            
            127                 result = encoder.transform(tensor_column)
            
            128                 result = torch.LongTensor(result)
            
            
               
               
                  - 
                     
                        E501
                     
                     Line too long (83 > 79 characters)
129             translated = result if col == 0 else torch.cat((translated, result), 1) 
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
130 
            131         pointer = 0
            
            132         encoded_tensors = list()
            
            133         for tensor in tensor_list:
            
            134             n = tensor.shape[0]
            
            135             encoded_tensors.append(translated[pointer:pointer+n])
            
            136             pointer += n
            
            
               
               
                  - 
                     
                        W293
                     
                     Blank line contains whitespace
137 
            138         self.d = d
            
            139         return encoded_tensors
            
            140 '''