I have 2 GPU GTX1080 with Keras v2 installed. I run 2 training processes on gpu0 and gpu1 simultaneously. But, I got ResourceExhaustedError.
What is missing?
python multi-gpu-process.py --gpu_id=1 (ok)
python multi-gpu-process.py --gpu_id=0 (ResourceExhaustedError. Help pls.)
# file: multi-gpu-process.py (2 training processes work on different GPUs)
import numpy as np
import os
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# What GPU is installed.
gpu_list = get_available_gpus()
# Specified gpu installed on machine?
if not '/gpu:' + str(FLAGS.gpu_id) in gpu_list:
raise Exception('This gpu is not installed: /gpu:{}'.format(FLAGS.gpu_id))
# Set GPU in environment.
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # see issue #152
os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id)
import keras
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Conv2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
from tensorflow.python.client import device_lib
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('gpu_id', 0, """GPU id. Single gpu case.""")
# Use one GPU.
import keras.backend.tensorflow_backend as K # If import and not used, error on session release.
# Solve this error
# https://stackoverflow.com/questions/42969779/keras-error-you-must-feed-a-value-for-placeholder-tensor-bidirectional-1-keras
K.set_learning_phase(1) # set learning phase
# train on specified gpu
with K.tf.device('/gpu:%d' % FLAGS.gpu_id):
K.set_session(K.tf.Session(config=K.tf.ConfigProto(allow_soft_placement=True, # True. Allow to find other device if specified is not available.
log_device_placement=True)))
# To prove running multi process on gpu. Make small model.
model = Sequential()
model.add(Dense(400, input_dim=800, activation='tanh'))
model.add(Dense(200, input_dim=800, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
print (model.summary())
optimizer = keras.optimizers.Adam(lr=0.0001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
x = np.random.rand(131072, 800)
y = np.random.randint(0, 2, (131072, 1))
model.fit(x, y, batch_size=2048*4, epochs=1000000)
K.clear_session() # Without it, session error at the end.
==================Not work when starting 2nd process on 2nd GPU (GTX 1080, 8GB)=========================
# Increased model size compared with working version
#
# file: multi-gpu-process_notwork.py
import numpy as np
import os
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# What GPU is installed.
gpu_list = get_available_gpus()
# Specified gpu installed on machine?
if not '/gpu:' + str(FLAGS.gpu_id) in gpu_list:
raise Exception('This gpu is not installed: /gpu:{}'.format(FLAGS.gpu_id))
# Set GPU in environment.
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # see issue #152
os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id)
import keras
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Conv2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
from tensorflow.python.client import device_lib
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('gpu_id', 0, """GPU id. Single gpu case.""")
# Use one GPU.
import keras.backend.tensorflow_backend as K # If import and not used, error on session release.
# Solve this error
# https://stackoverflow.com/questions/42969779/keras-error-you-must-feed-a-value-for-placeholder-tensor-bidirectional-1-keras
K.set_learning_phase(1) # set learning phase
# train on specified gpu
with K.tf.device('/gpu:%d' % FLAGS.gpu_id):
K.set_session(K.tf.Session(config=K.tf.ConfigProto(allow_soft_placement=True, # True. Allow to find other device if specified is not available.
log_device_placement=True)))
# To prove running multi process on gpu. Make small model.
model = Sequential()
model.add(Dense(4000, input_dim=8000, activation='tanh'))
model.add(Dense(2000, input_dim=8000, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
print (model.summary())
optimizer = keras.optimizers.Adam(lr=0.0001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
x = np.random.rand(131072, 8000)
y = np.random.randint(0, 2, (131072, 1))
model.fit(x, y, batch_size=2048*4, epochs=1000000)
K.clear_session() # Without it, session error at the end.
Partial error:
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[8192,400]
[[Node: gradients/dense_1/Tanh_grad/TanhGrad = TanhGrad[T=DT_FLOAT, _class=["loc:@dense_1/Tanh"], _device="/job:localhost/replica:0/task:0/gpu:0"](dense_1/Tanh, gradients/dense_2/MatMul_grad/MatMul)]]
Since the GPU has 8GB, it has nothing to do with model, which is pretty small size.