I am trying to run the code of this a Transformer paper. I am loading the data into the data loader, but I want to pass loop across the batches and view the data. I am new to dataloader and I know that it is a simple process, but I cannot seem to figure it out.
Please find the code I used for the data loader below.
#using only the SMAP class from the 'data_loader.py' file
class SMAPSegLoader(object):
def __init__(self, data_path, win_size, step, mode="train"):
#data_path needs to be set as per data location in local computer. For me is is- '/content/gdrive/MyDrive/Colab Notebooks/'
#initilizations
self.mode = mode
self.step = step
self.win_size = win_size
#set scaler
self.scaler = StandardScaler()
#add path to train data
data = np.load(data_path + "/SMAP_train.npy")
#pass train data to scaler and transform
self.scaler.fit(data)
data = self.scaler.transform(data)
#repeat same for test data
test_data = np.load(data_path + "/SMAP_test.npy")
self.test = self.scaler.transform(test_data)
#store scaled and transformed train and test data
self.train = data
#set validation data also to test data as of now
self.val = self.test
#load the test labels
self.test_labels = np.load(data_path + "/SMAP_test_label.npy")
print("test:", self.test.shape)
print("train:", self.train.shape)
def __len__(self):
#returning the length of input considered
if self.mode == "train":
return (self.train.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'val'):
return (self.val.shape[0] - self.win_size) // self.step + 1
elif (self.mode == 'test'):
return (self.test.shape[0] - self.win_size) // self.step + 1
else:
return (self.test.shape[0] - self.win_size) // self.win_size + 1
def __getitem__(self, index):
index = index * self.step
if self.mode == "train":
return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'val'):
return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
elif (self.mode == 'test'):
return np.float32(self.test[index:index + self.win_size]), np.float32(
self.test_labels[index:index + self.win_size])
else:
return np.float32(self.test[
index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
#modified function from 'data_loader.py' because dealing with only SMAP data
def get_loader_segment(data_path,batch_size, win_size=100, step=100, mode='train', dataset='SMAP'):
batch = batch_size
if (dataset == 'SMAP'):
dataset = SMAPSegLoader(data_path, win_size, 1, mode)
shuffle = False
if mode == 'train':
shuffle = True
print(batch_size)
data_loader = DataLoader(dataset=dataset,batch_size=batch,shuffle=shuffle,num_workers=0)
return data_loader
I call the data loader finally using
path = '/content/gdrive/MyDrive/Colab Notebooks/'
dataset_name = 'SMAP'
train_loader = get_loader_segment(data_path=path, batch_size=8, win_size=100,mode='train',dataset=dataset_name)
I want to run a loop of this sort
for batch in train_loader:
print(len(batch))
print(batch)
But I keep getting an error