I'm not able to do dataset.map() or dataset.select(range(10)) with huggingface Datasets library in colab. It says dill_.dill has no attribute log
I have tried with different dill versions, but no luck.
I tried with older versions of dill lib but they were also giving same error.
Is there a way to fix the issue? Or, any work around?
Any leads will be helpful
stack trace:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-39-dd9b972a8f3f> in <module>
1 test_data = load_dataset("scientific_papers", "arxiv", ignore_verifications=True, split="test")
2 print(test_data)
----> 3 data = test_data.select(range(10))
16 frames
/usr/local/lib/python3.9/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
155 }
156 # apply actual function
--> 157 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
158 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
159 # re-apply format to the output
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
155 if kwargs.get(fingerprint_name) is None:
156 kwargs_for_fingerprint["fingerprint_name"] = fingerprint_name
--> 157 kwargs[fingerprint_name] = update_fingerprint(
158 self._fingerprint, transform, kwargs_for_fingerprint
159 )
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in update_fingerprint(fingerprint, transform, transform_args)
103 for key in sorted(transform_args):
104 hasher.update(key)
--> 105 hasher.update(transform_args[key])
106 return hasher.hexdigest()
107
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in update(self, value)
55 def update(self, value):
56 self.m.update(f"=={type(value)}==".encode("utf8"))
---> 57 self.m.update(self.hash(value).encode("utf-8"))
58
59 def hexdigest(self):
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in hash(cls, value)
51 return cls.dispatch[type(value)](cls, value)
52 else:
---> 53 return cls.hash_default(value)
54
55 def update(self, value):
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in hash_default(cls, value)
44 @classmethod
45 def hash_default(cls, value):
---> 46 return cls.hash_bytes(dumps(value))
47
48 @classmethod
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in dumps(obj)
387 file = StringIO()
388 with _no_cache_fields(obj):
--> 389 dump(obj, file)
390 return file.getvalue()
391
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in dump(obj, file)
359 def dump(obj, file):
360 """pickle an object to a file"""
--> 361 Pickler(file, recurse=True).dump(obj)
362 return
363
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in dump(self, obj)
392 f = filename
393 else:
--> 394 f = open(filename, 'wb')
395 try:
396 if byref:
/usr/lib/python3.9/pickle.py in dump(self, obj)
485 if self.proto >= 4:
486 self.framer.start_framing()
--> 487 self.save(obj)
488 self.write(STOP)
489 self.framer.end_framing()
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in save(self, obj, save_persistent_id)
386 def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds):
387 """pickle the current state of __main__ to a file"""
--> 388 from .settings import settings
389 protocol = settings['protocol']
390 if main is None: main = _main_module
/usr/lib/python3.9/pickle.py in save(self, obj, save_persistent_id)
558 f = self.dispatch.get(t)
559 if f is not None:
--> 560 f(self, obj) # Call unbound method with explicit self
561 return
562
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in save_singleton(pickler, obj)
1524 def pickles(obj,exact=False,safe=False,**kwds):
1525 """
-> 1526 Quick check if object pickles with dill.
1527
1528 If *exact=True* then an equality test is done to check if the reconstructed
/usr/lib/python3.9/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, state_setter, obj)
689 write(NEWOBJ)
690 else:
--> 691 save(func)
692 save(args)
693 write(REDUCE)
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in save(self, obj, save_persistent_id)
386 def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds):
387 """pickle the current state of __main__ to a file"""
--> 388 from .settings import settings
389 protocol = settings['protocol']
390 if main is None: main = _main_module
/usr/lib/python3.9/pickle.py in save(self, obj, save_persistent_id)
558 f = self.dispatch.get(t)
559 if f is not None:
--> 560 f(self, obj) # Call unbound method with explicit self
561 return
562
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in save_function(pickler, obj)
583 dill._dill.log.info("# F1")
584 else:
--> 585 dill._dill.log.info("F2: %s" % obj)
586 name = getattr(obj, "__qualname__", getattr(obj, "__name__", None))
587 dill._dill.StockPickler.save_global(pickler, obj, name=name)
AttributeError: module 'dill._dill' has no attribute 'log