⬅ download_datasets.py source

1 import argparse
2 import gds
  • F811 Redefinition of unused 'argparse' from line 1
3 import argparse
4  
  • F811 Redefinition of unused 'gds' from line 2
5 import gds
6  
7  
8 def main():
9 """
10 Downloads the latest versions of all specified datasets,
11 if they do not already exist.
12 """
13 parser = argparse.ArgumentParser()
14 parser.add_argument('--root_dir', required=True,
  • E501 Line too long (131 > 79 characters)
15 help='The directory where [dataset]/data can be found (or should be downloaded to, if it does not exist).')
16 parser.add_argument('--datasets', nargs='*', default=None,
  • E501 Line too long (227 > 79 characters)
17 help=f'Specify a space-separated list of dataset names to download. If left unspecified, the script will download all of the official benchmark datasets. Available choices are {gds.supported_datasets}.')
18 config = parser.parse_args()
19  
20 if config.datasets is None:
21 config.datasets = gds.benchmark_datasets
22  
23 for dataset in config.datasets:
24 if dataset not in gds.supported_datasets:
  • E501 Line too long (99 > 79 characters)
25 raise ValueError(f'{dataset} not recognized; must be one of {gds.supported_datasets}.')
26  
27 print(f'Downloading the following datasets: {config.datasets}')
28 for dataset in config.datasets:
29 print(f'=== {dataset} ===')
30 gds.get_dataset(
31 dataset=dataset,
32 root_dir=config.root_dir,
33 download=True)
34  
35  
36 if __name__ == '__main__':
37 main()