0

I am trying to run the code of this a Transformer paper. I am loading the data into the data loader, but I want to pass loop across the batches and view the data. I am new to dataloader and I know that it is a simple process, but I cannot seem to figure it out.

Please find the code I used for the data loader below.

#using only the SMAP class from the 'data_loader.py' file
class SMAPSegLoader(object):
    def __init__(self, data_path, win_size, step, mode="train"):
        #data_path needs to be set as per data location in local computer. For me is is- '/content/gdrive/MyDrive/Colab Notebooks/'
        #initilizations
        self.mode = mode
        self.step = step
        self.win_size = win_size
        #set scaler
        self.scaler = StandardScaler()
        #add path to train data
        data = np.load(data_path + "/SMAP_train.npy")
        #pass train data to scaler and transform
        self.scaler.fit(data)
        data = self.scaler.transform(data)
        #repeat same for test data
        test_data = np.load(data_path + "/SMAP_test.npy")
        self.test = self.scaler.transform(test_data)

        #store scaled and transformed train and test data
        self.train = data
        #set validation data also to test data as of now
        self.val = self.test
        #load the test labels
        self.test_labels = np.load(data_path + "/SMAP_test_label.npy")
        print("test:", self.test.shape)
        print("train:", self.train.shape)

    def __len__(self):

        #returning the length of input considered
        if self.mode == "train":
            return (self.train.shape[0] - self.win_size) // self.step + 1
        elif (self.mode == 'val'):
            return (self.val.shape[0] - self.win_size) // self.step + 1
        elif (self.mode == 'test'):
            return (self.test.shape[0] - self.win_size) // self.step + 1
        else:
            return (self.test.shape[0] - self.win_size) // self.win_size + 1

    def __getitem__(self, index):
        index = index * self.step
        if self.mode == "train":
            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
        elif (self.mode == 'val'):
            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
        elif (self.mode == 'test'):
            return np.float32(self.test[index:index + self.win_size]), np.float32(
                self.test_labels[index:index + self.win_size])
        else:
            return np.float32(self.test[
                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
                              
#modified function from 'data_loader.py' because dealing with only SMAP data
def get_loader_segment(data_path,batch_size, win_size=100, step=100, mode='train', dataset='SMAP'):
    batch = batch_size
    if (dataset == 'SMAP'):
        dataset = SMAPSegLoader(data_path, win_size, 1, mode)

    shuffle = False
    if mode == 'train':
        shuffle = True
    print(batch_size)
    data_loader = DataLoader(dataset=dataset,batch_size=batch,shuffle=shuffle,num_workers=0)
    return data_loader

I call the data loader finally using

path = '/content/gdrive/MyDrive/Colab Notebooks/'
dataset_name = 'SMAP'
train_loader = get_loader_segment(data_path=path, batch_size=8, win_size=100,mode='train',dataset=dataset_name)

I want to run a loop of this sort

for batch in train_loader:
  print(len(batch))
  print(batch)

But I keep getting an error

Sar99
  • 43
  • 5
  • Found (sort of) a solution here - https://stackoverflow.com/questions/66272911/how-to-see-the-data-in-dataloader-in-pytorch – Sar99 Aug 01 '23 at 20:05

0 Answers0