0

I'm trying to download the Cats Vs. Dogs dataset from Tensorflow, but am unable to do so. The code I'm using is:

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.optimizers import RMSprop
import certifi
import urllib3
urllib3.disable_warnings()

# Download the dataset
setattr(tfds.image.cats_vs_dogs, '_URL',"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip")
dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')

Which I found at Error in importing Cats-vs-Dogs dataset in Google Colab. The only change I needed to make was to replace image_clasification with image, since tfds apparently doesn't have an image_classification method any longer. I am now getting a Checksum error with the following error trace:

    Downloading and preparing dataset cats_vs_dogs (786.68 MiB) to data/cats_vs_dogs/2.0.1...

Dl Completed...: 100%
1/1 [00:08<00:00, 8.26s/ url]
Dl Size...: 100%
786/786 [00:08<00:00, 98.21 MiB/s]

---------------------------------------------------------------------------
NonMatchingChecksumError                  Traceback (most recent call last)
Cell In[14], line 3
      1 # Download the dataset
      2 setattr(tfds.image.cats_vs_dogs, '_URL',"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip")
----> 3 dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/api_utils.py:52, in disallow_positional_args.<locals>.disallow_positional_args_dec(fn, instance, args, kwargs)
     50 _check_no_positional(fn, args, ismethod, allowed=allowed)
     51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/registered.py:300, in load(name, split, data_dir, batch_size, in_memory, shuffle_files, download, as_supervised, decoders, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
    298 if download:
    299   download_and_prepare_kwargs = download_and_prepare_kwargs or {}
--> 300   dbuilder.download_and_prepare(**download_and_prepare_kwargs)
    302 if as_dataset_kwargs is None:
    303   as_dataset_kwargs = {}

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/api_utils.py:52, in disallow_positional_args.<locals>.disallow_positional_args_dec(fn, instance, args, kwargs)
     50 _check_no_positional(fn, args, ismethod, allowed=allowed)
     51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:285, in DatasetBuilder.download_and_prepare(self, download_dir, download_config)
    281 with file_format_adapter.incomplete_dir(self._data_dir) as tmp_data_dir:
    282   # Temporarily assign _data_dir to tmp_data_dir to avoid having to forward
    283   # it to every sub function.
    284   with utils.temporary_assignment(self, "_data_dir", tmp_data_dir):
--> 285     self._download_and_prepare(
    286         dl_manager=dl_manager,
    287         download_config=download_config)
    289     # NOTE: If modifying the lines below to put additional information in
    290     # DatasetInfo, you'll likely also want to update
    291     # DatasetInfo.read_from_directory to possibly restore these attributes
    292     # when reading from package data.
    293 
    294     # Update the DatasetInfo metadata by computing statistics from the data.
    295     if (download_config.compute_stats == download.ComputeStatsMode.SKIP or
    296         download_config.compute_stats == download.ComputeStatsMode.AUTO and
    297         bool(self.info.splits.total_num_examples)
    298        ):

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:946, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
    944 def _download_and_prepare(self, dl_manager, download_config):
    945   # Extract max_examples_per_split and forward it to _prepare_split
--> 946   super(GeneratorBasedBuilder, self)._download_and_prepare(
    947       dl_manager=dl_manager,
    948       max_examples_per_split=download_config.max_examples_per_split,
    949   )

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:804, in FileAdapterBuilder._download_and_prepare(self, dl_manager, **prepare_split_kwargs)
    802 # Generating data for all splits
    803 split_dict = splits_lib.SplitDict()
--> 804 for split_generator in self._split_generators(dl_manager):
    805   if splits_lib.Split.ALL == split_generator.split_info.name:
    806     raise ValueError(
    807         "tfds.Split.ALL is a special split keyword corresponding to the "
    808         "union of all splits, so cannot be used as key in "
    809         "._split_generator()."
    810     )

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/image/cats_vs_dogs.py:81, in CatsVsDogs._split_generators(self, dl_manager)
     80 def _split_generators(self, dl_manager):
---> 81   path = dl_manager.download(_URL)
     83   # There is no predefined train/val/test split for this dataset.
     84   return [
     85       tfds.core.SplitGenerator(
     86           name=tfds.Split.TRAIN,
   (...)
     90           }),
     91   ]

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:301, in DownloadManager.download(self, url_or_urls)
    299 # Add progress bar to follow the download state
    300 with self._downloader.tqdm():
--> 301   return _map_promise(self._download, url_or_urls)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:395, in _map_promise(map_fn, all_inputs)
    393 """Map the function into each element and resolve the promise."""
    394 all_promises = utils.map_nested(map_fn, all_inputs)  # Apply the function
--> 395 res = utils.map_nested(_wait_on_promise, all_promises)
    396 return res

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/utils/py_utils.py:143, in map_nested(function, data_struct, dict_only, map_tuple)
    141       return tuple(mapped)
    142 # Singleton
--> 143 return function(data_struct)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:379, in _wait_on_promise(p)
    378 def _wait_on_promise(p):
--> 379   return p.get()

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:512, in Promise.get(self, timeout)
    510 target = self._target()
    511 self._wait(timeout or DEFAULT_TIMEOUT)
--> 512 return self._target_settled_value(_raise=True)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:516, in Promise._target_settled_value(self, _raise)
    514 def _target_settled_value(self, _raise=False):
    515     # type: (bool) -> Any
--> 516     return self._target()._settled_value(_raise)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:226, in Promise._settled_value(self, _raise)
    224 if _raise:
    225     raise_val = self._fulfillment_handler0
--> 226     reraise(type(raise_val), raise_val, self._traceback)
    227 return self._fulfillment_handler0

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/six.py:719, in reraise(tp, value, tb)
    717     if value.__traceback__ is not tb:
    718         raise value.with_traceback(tb)
--> 719     raise value
    720 finally:
    721     value = None

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/promise/promise.py:87, in try_catch(handler, *args, **kwargs)
     84 def try_catch(handler, *args, **kwargs):
     85     # type: (Callable, Any, Any) -> Union[Tuple[Any, None], Tuple[None, Tuple[Exception, Optional[TracebackType]]]]
     86     try:
---> 87         return (handler(*args, **kwargs), None)
     88     except Exception as e:
     89         tb = exc_info()[2]

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:245, in DownloadManager._download.<locals>.callback(val)
    243 def callback(val):
    244   checksum, dl_size = val
--> 245   return self._handle_download_result(
    246       resource, download_dir_path, checksum, dl_size)

File ~/anaconda3/envs/work_env/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py:207, in DownloadManager._handle_download_result(self, resource, tmp_dir_path, sha256, dl_size)
    205   self._record_sizes_checksums()
    206 elif (dl_size, sha256) != self._sizes_checksums.get(resource.url, None):
--> 207   raise NonMatchingChecksumError(resource.url, tmp_path)
    208 download_path = self._get_final_dl_path(resource.url, sha256)
    209 resource_lib.write_info_file(resource, download_path, self._dataset_name,
    210                              original_fname)

NonMatchingChecksumError: Artifact https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip, downloaded to data/downloads/down.micr.com_down_3_E_1_3E1C-ECDB-4869-83s65y3LdGwg1Nl3MtI_KASK_7DbbKUE5i78aSRiYcW5Y.zip.tmp.d29547b4270b45f29828b6cf78a2474a/kagglecatsanddogs_5340.zip, has wrong checksum.

What can/should I do from here?

user1245262
  • 6,968
  • 8
  • 50
  • 77

0 Answers0