Why does the runtime keep crashing on Google Colab.
I have a simple MLP code that runs on my machine. I tried running the same code on Colab but it crashes immediately after loading the data files.
The data files are around 3GB total. The CPU and the GPU memory for the Colab virtual machine are easily above that.
Then why does my program crash before it can even start training.
My Code:
def load_raw(name):
return (np.load(name + '.npy', encoding='bytes'), np.load(name + '_labels.npy', encoding='bytes'))
class WSJ():
def __init__(self):
self.dev_set = None
self.train_set = None
self.test_set = None
@property
def dev(self):
if self.dev_set is None:
self.dev_set = load_raw('dev')
return self.dev_set
@property
def train(self):
if self.train_set is None:
self.train_set = load_raw('train')
return self.train_set
@property
def test(self):
if self.test_set is None:
self.test_set = (np.load('test.npy', encoding='bytes'), None)
return self.test_set
def preprocess_data(self, trainX, trainY, k):
# some form of preprocessing that pads and flattens the data into the format required
return trainX_padded, trainY, y_to_x_map
def main():
global index
padding = 3
epochs = 1
batch_size = 512
lr = 0.1
momentum = 0.9
input_dim = 40 * ((2*padding) + 1)
output_dim = 138
neural_net = MLP(input_dim, output_dim)
!free -g
print("Starting...")
loader = WSJ()
trainX, trainY = loader.train
print("Training Data obtained...")
!free -g
trainX, trainY, y_to_x_map = loader.preprocess_data(trainX, trainY, k = padding)
print("Training Data preprocessed...")
!free -g
devX, devY = loader.dev
devX, devY, y_to_x_map_dev = loader.preprocess_data(devX, devY, k = padding)
print("Development data preprocessed...")
!free -g
print("Scaling...")
input_scaler = preprocessing.StandardScaler().fit(trainX)
!free -g
trainX = input_scaler.transform(trainX)
devX = input_scaler.transform(devX)
It crashes immediately after after printing Scaling...