I tried the following code
import pandas as pd
import os
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader
class ArticleDataset(Dataset):
def __init__(self):
# super().__init__()
df= pd.read_csv('https://drive.google.com/uc?export?format=csv&=download&id=*********', sep = "delimiter", index_col=0, header=None, engine='python')
self.articles_list=df
def __len__(self):
return len(self.articles_list)
def __getitem__(self, item):
return self.articles_list.index[item]
The data frame has data and prints data when df.head() is executed. But it gives the error
TypeError Traceback (most recent call last)
<ipython-input-74-08e914e52560> in <module>()
1 dataset =ArticleDataset()
----> 2 article_loader=DataLoader(dataset, batch_size=1, shuffle=True)
2 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/sampler.py in num_samples(self)
98 # dataset size might change at runtime
99 if self._num_samples is None:
--> 100 return len(self.data_source)
101 return self._num_samples
102
TypeError: object of type 'ArticleDataset' has no len()
when
dataset =ArticleDataset()
article_loader=DataLoader(dataset, batch_size=1, shuffle=True)
is executed.
But according to the correct answer found in here, len(DataFrame.index) gives me the expected no. of rows in the Dataframe as well. And df.index1 prints the first row of the Dataframe, but cannot figure out what is wrong.
I tried turning the data frame to list as well, but the same error was given. May be there is some error in the code that I'm not seeing and I'm new to python. Any sort of help is appreciated.
Edit (After Unindenting once):
import pandas as pd
import os
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader
class ArticleDataset(Dataset):
def __init__(self):
# super().__init__()
df= pd.read_csv('https://drive.google.com/uc?export?format=csv&=download&id=*******', sep = "delimiter", index_col=0, header=None, engine='python')
self.articles_list=df
def __len__(self):
return len(self.articles_list)
def __getitem__(self, item):
return self.articles_list.index[item]
this will give the following error
File "<ipython-input-104-165bbc37826a>", line 13
def __len__(self):
^
IndentationError: unindent does not match any outer indentation level