Relatively new to using CUDA. I keep getting the following error after a seemingly random period of time: RuntimeError: CUDA error: an illegal memory access was encountered
I have seen people suggest things such as using cuda.set_device()
rather than cuda.device()
, setting torch.backends.cudnn.benchmark = False
but I can't seem to get the error to go away. Here are some pieces of my code:
torch.cuda.set_device(torch.device('cuda:0'))
torch.backends.cudnn.benchmark = False
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_().cuda()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_().cuda()
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out[:, -1, :])
return out
def pred(self, x):
return self(x) > 0
def train(model, loss_fn, optimizer, num_epochs, x_train, y_train, x_val, y_val, loss_stop=60):
cur_best_loss = 999
loss_recur_count = 0
best_model = None
for t in range(num_epochs):
model.train()
y_train_pred = model(x_train)
train_loss = loss_fn(y_train_pred, y_train)
tr_l = train_loss.item()
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
y_val_pred = model(x_val)
val_loss = loss_fn(y_val_pred, y_val)
va_l = val_loss.item()
if va_l < cur_best_loss:
cur_best_loss = va_l
best_model = model
loss_recur_count = 0
else:
loss_recur_count += 1
if loss_recur_count == loss_stop:
break
if best_model is None:
print("model is None.")
return best_model
def lstm_test(cols, df, test_percent, test_bal, initial_shares_test, max_price, last_sell_day):
wdw = 20
x_train, y_train, x_test, y_test, x_val, y_val = load_data(df, wdw, test_percent, cols)
x_train = torch.from_numpy(x_train).type(torch.Tensor).cuda()
x_test = torch.from_numpy(x_test).type(torch.Tensor).cuda()
x_val = torch.from_numpy(x_val).type(torch.Tensor).cuda()
y_train = torch.from_numpy(y_train).type(torch.Tensor).cuda()
y_test = torch.from_numpy(y_test).type(torch.Tensor).cuda()
y_val = torch.from_numpy(y_val).type(torch.Tensor).cuda()
input_dim = x_train.shape[-1]
hidden_dim = 32
num_layers = 2
output_dim = 1
y_preds_dict = {}
for i in range(11):
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers).cuda()
r = (y_train.cpu().shape[0] - np.count_nonzero(y_train.cpu()))/np.count_nonzero(y_train.cpu())/2
pos_w = torch.tensor([r]).cuda()
loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pos_w).cuda()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
best_model = train(model, loss_fn, optimizer, 300, x_train, y_train, x_val, y_val)
y_test_pred = get_predictions(best_model, x_test)
y_preds_dict[i] = y_test_pred.cpu().detach().numpy().flatten()
and here is the error msg:
<ipython-input-5-c52edc2c0508> in train(model, loss_fn, optimizer, num_epochs, x_train, y_train, x_val, y_val, loss_stop)
19 model.eval()
20 with torch.no_grad():
---> 21 y_val_pred = model(x_val)
22
23 val_loss = loss_fn(y_val_pred, y_val)
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-4-9da8c811c037> in forward(self, x)
10
11 def forward(self, x):
---> 12 h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_().cuda()
13 c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_().cuda()
14
RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.