I'm trying to download the Cats Vs. Dogs dataset from Tensorflow, but am unable to do so. The code I'm using is:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.optimizers import RMSprop
import certifi
import urllib3
urllib3.disable_warnings()
# Download the dataset
setattr(tfds.image.cats_vs_dogs, '_URL',"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip")
dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')
Which I found at Error in importing Cats-vs-Dogs dataset in Google Colab. The only change I needed to make was to replace image_clasification
with image
, since tfds
apparently doesn't have an image_classification
method any longer. I am now getting a Checksum error with the following error trace:
Downloading and preparing dataset cats_vs_dogs (786.68 MiB) to data/cats_vs_dogs/2.0.1...
Dl Completed...: 100%
1/1 [00:08<00:00, 8.26s/ url]
Dl Size...: 100%
786/786 [00:08<00:00, 98.21 MiB/s]
---------------------------------------------------------------------------
NonMatchingChecksumError Traceback (most recent call last)
Cell In[14], line 3
1 # Download the dataset
2 setattr(tfds.image.cats_vs_dogs, '_URL',"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip")
----> 3 dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/api_utils.py:52, in disallow_positional_args.<locals>.disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/registered.py:300, in load(name, split, data_dir, batch_size, in_memory, shuffle_files, download, as_supervised, decoders, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
298 if download:
299 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
--> 300 dbuilder.download_and_prepare(**download_and_prepare_kwargs)
302 if as_dataset_kwargs is None:
303 as_dataset_kwargs = {}
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/api_utils.py:52, in disallow_positional_args.<locals>.disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:285, in DatasetBuilder.download_and_prepare(self, download_dir, download_config)
281 with file_format_adapter.incomplete_dir(self._data_dir) as tmp_data_dir:
282 # Temporarily assign _data_dir to tmp_data_dir to avoid having to forward
283 # it to every sub function.
284 with utils.temporary_assignment(self, "_data_dir", tmp_data_dir):
--> 285 self._download_and_prepare(
286 dl_manager=dl_manager,
287 download_config=download_config)
289 # NOTE: If modifying the lines below to put additional information in
290 # DatasetInfo, you'll likely also want to update
291 # DatasetInfo.read_from_directory to possibly restore these attributes
292 # when reading from package data.
293
294 # Update the DatasetInfo metadata by computing statistics from the data.
295 if (download_config.compute_stats == download.ComputeStatsMode.SKIP or
296 download_config.compute_stats == download.ComputeStatsMode.AUTO and
297 bool(self.info.splits.total_num_examples)
298 ):
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:946, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
944 def _download_and_prepare(self, dl_manager, download_config):
945 # Extract max_examples_per_split and forward it to _prepare_split
--> 946 super(GeneratorBasedBuilder, self)._download_and_prepare(
947 dl_manager=dl_manager,
948 max_examples_per_split=download_config.max_examples_per_split,
949 )
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:804, in FileAdapterBuilder._download_and_prepare(self, dl_manager, **prepare_split_kwargs)
802 # Generating data for all splits
803 split_dict = splits_lib.SplitDict()
--> 804 for split_generator in self._split_generators(dl_manager):
805 if splits_lib.Split.ALL == split_generator.split_info.name:
806 raise ValueError(
807 "tfds.Split.ALL is a special split keyword corresponding to the "
808 "union of all splits, so cannot be used as key in "
809 "._split_generator()."
810 )
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/image/cats_vs_dogs.py:81, in CatsVsDogs._split_generators(self, dl_manager)
80 def _split_generators(self, dl_manager):
---> 81 path = dl_manager.download(_URL)
83 # There is no predefined train/val/test split for this dataset.
84 return [
85 tfds.core.SplitGenerator(
86 name=tfds.Split.TRAIN,
(...)
90 }),
91 ]
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:301, in DownloadManager.download(self, url_or_urls)
299 # Add progress bar to follow the download state
300 with self._downloader.tqdm():
--> 301 return _map_promise(self._download, url_or_urls)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:395, in _map_promise(map_fn, all_inputs)
393 """Map the function into each element and resolve the promise."""
394 all_promises = utils.map_nested(map_fn, all_inputs) # Apply the function
--> 395 res = utils.map_nested(_wait_on_promise, all_promises)
396 return res
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/utils/py_utils.py:143, in map_nested(function, data_struct, dict_only, map_tuple)
141 return tuple(mapped)
142 # Singleton
--> 143 return function(data_struct)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:379, in _wait_on_promise(p)
378 def _wait_on_promise(p):
--> 379 return p.get()
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:512, in Promise.get(self, timeout)
510 target = self._target()
511 self._wait(timeout or DEFAULT_TIMEOUT)
--> 512 return self._target_settled_value(_raise=True)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:516, in Promise._target_settled_value(self, _raise)
514 def _target_settled_value(self, _raise=False):
515 # type: (bool) -> Any
--> 516 return self._target()._settled_value(_raise)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:226, in Promise._settled_value(self, _raise)
224 if _raise:
225 raise_val = self._fulfillment_handler0
--> 226 reraise(type(raise_val), raise_val, self._traceback)
227 return self._fulfillment_handler0
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/six.py:719, in reraise(tp, value, tb)
717 if value.__traceback__ is not tb:
718 raise value.with_traceback(tb)
--> 719 raise value
720 finally:
721 value = None
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:87, in try_catch(handler, *args, **kwargs)
84 def try_catch(handler, *args, **kwargs):
85 # type: (Callable, Any, Any) -> Union[Tuple[Any, None], Tuple[None, Tuple[Exception, Optional[TracebackType]]]]
86 try:
---> 87 return (handler(*args, **kwargs), None)
88 except Exception as e:
89 tb = exc_info()[2]
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:245, in DownloadManager._download.<locals>.callback(val)
243 def callback(val):
244 checksum, dl_size = val
--> 245 return self._handle_download_result(
246 resource, download_dir_path, checksum, dl_size)
File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:207, in DownloadManager._handle_download_result(self, resource, tmp_dir_path, sha256, dl_size)
205 self._record_sizes_checksums()
206 elif (dl_size, sha256) != self._sizes_checksums.get(resource.url, None):
--> 207 raise NonMatchingChecksumError(resource.url, tmp_path)
208 download_path = self._get_final_dl_path(resource.url, sha256)
209 resource_lib.write_info_file(resource, download_path, self._dataset_name,
210 original_fname)
NonMatchingChecksumError: Artifact https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip, downloaded to data/downloads/down.micr.com_down_3_E_1_3E1C-ECDB-4869-83s65y3LdGwg1Nl3MtI_KASK_7DbbKUE5i78aSRiYcW5Y.zip.tmp.d29547b4270b45f29828b6cf78a2474a/kagglecatsanddogs_5340.zip, has wrong checksum.
What can/should I do from here?