I would like to process some textual data with “sentence-transformers” (generated embeddings for textual data) on multiple GPUs (2 T4, 15 GB per GPU) and 16 vCPUs (with 60 GB RAM) on GCP from Jupyter notebook.
The data size is not large but the worker nodes were restarted due to memory leakage even though the garbage collection threshold was set up from shell.
My code:
# run export MALLOC_TRIM_THRESHOLD_=65536 from shell before starting dask cluster
!pip install sentence-transformers
import os
import glob
import numpy as np
import gc
import cudf
import dask_cudf
import cupy
import rmm
from dask.distributed import Client, wait, get_worker, get_client
from dask_cuda import LocalCUDACluster
cluster = LocalCUDACluster(CUDA_VISIBLE_DEVICES="0,1", n_workers=2, threads_per_worker=4, memory_limit="15GB",\
device_memory_limit="24GB", rmm_pool_size="4GB", rmm_maximum_pool_size="15GB")
client = Client(cluster)
print(client.run(os.getenv, "MALLOC_TRIM_THRESHOLD_")) # 65536
initial_pool_size = 4*10**9
maximum_pool_size = 15*10**9
rmm.reinitialize(pool_allocator=True, managed_memory=True, initial_pool_size=initial_pool_size,
maximum_pool_size=maximum_pool_size, devices=[0,1], logging=True, log_file_name='./tmp/logs/test_sbert_distributed.log')
import dask.dataframe as dd
import pandas as pd
from dask.multiprocessing import get
import random
df = pd.DataFrame({'col_1': ["This is sentence " + str(x) for x in random.sample(range(10**7), 10**7)],
'col_2': ["That is another sentence " + str(x) for x in random.sample(range(10**7), 10**7)]})
cudf_df = cudf.DataFrame.from_pandas(df)
dask_df = dask_cudf.from_cudf(cudf_df, npartitions=8)
from sentence_transformers import SentenceTransformer
import numpy as np
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
def test_f_str(df, args):
col1, col2, chunks = args
for col in [col1, col2]:
emb = sbert_model.encode(sentences=df[col].to_arrow().to_pylist(), batch_size=1250, show_progress_bar=True)
semb = np.array([str(x) for x in emb])
df[col+'_emb'] = semb
return df
dask_cudf.core.Series
chunks = dask_df.map_partitions(lambda x: len(x)).compute().to_numpy()
print(chunks, type(chunks))
[1250000 1250000 1250000 1250000 1250000 1250000 1250000 1250000] <class 'numpy.ndarray'>
dask_df.npartitions, dask_df.persist()
(8, <dask_cudf.DataFrame | 8 tasks | 8 npartitions>)
new_dask_df = dask_df.map_partitions(test_f_str,
args=('col_1', 'col_2', chunks),\
meta={'col_1':'object',\
'col_2':'object',\
'col_1_emb':'object',\
'col_2_emb':'object'})
new_dask_df.dtypes
col_1 object
col_2 object
col_1_emb object
col_2_emb object
dtype: object
new_dask_df.compute() # error: WARNING - Unmanaged memory use is high. This may indicate a memory leak or the memory
# may not be released to the OS; see https://distributed.dask.org/en/latest/worker-memory.html#memory-not-released-back-to-the-os for more information.
# -- Unmanaged memory: 9.96 GiB -- Worker memory limit: 13.97 GiB
I have tried many solutions but they are not helpful for the issue.
https://www.coiled.io/blog/tackling-unmanaged-memory-with-dask
https://stackoverflow.com/questions/71203077/why-does-dask-distributed-auto-memory-trimming-not-work
https://github.com/dask/distributed/issues/5971
https://stackoverflow.com/questions/72180961/dask-memory-leak-workaround
https://stackoverflow.com/questions/58275476/dask-distributed-workers-always-leak-memory-when-running-many-tasks
https://distributed.dask.org/en/stable/worker-memory.html
Could anybody point out what I missed here ?
============== UPDATE ===================
I am using the dashboards of https://developer.nvidia.com/blog/gpu-dashboards-in-jupyter-lab/ And https://developer.nvidia.com/blog/gpu-dashboards-in-jupyter-lab/ But the “workers memory” (bytes stored per worker) plots didn’t show any “unmanaged or leaked” ” memory.
But, the “GPU memory” plots’ color changed to orange and show memory spill based on https://distributed.dask.org/en/stable/worker-memory.html#using-the-dashboard-to-monitor-memory-usage
Please let me know how to confirm that it is CPU or GPU memory leakage ?