I have done a pipeline to read my data on file to a tf.data.Dataset
. The issue is that for each epoch, memory is accumulated. After a while the training is killed. I have tried to reduce the number of images shuffled. Tweak the number of parallel calls, but no success with anything.
On iteration 1 the memory consumption is ~8 GB and after 10 epochs it is ~15 GB.
This is how my pipeline looks like:
Edit I tried this instead:
def getDataset(data_root_path: Path, is_training: bool) -> tf.data.Dataset:
dirs = [x for x in data_root_path.iterdir() if x.is_dir()]
datasets = []
for dir in dirs:
annotation_path = dir / "annotations.json"
annotation = tf.io.read_file(str(annotation_path))
classes_path = dir / "classes.json"
classes = tf.io.read_file(str(classes_path))
dataset = createDataset(
dir,
annotation,
classes,
tf.cast(str(annotation_path), dtype=tf.string),
is_training=is_training,
)
datasets.append(dataset)
result = datasets[0]
for k in range(1, len(datasets)):
result = result.concatenate(datasets[k])
return result.shuffle(1000).prefetch(tf.data.AUTOTUNE).batch(8)
That worked for almost 300 epochs. But in the end the memory was exhausted.
Edit2
Tried this LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4 python Python/LineDetection/src/lineextractor.py
and it has not crashed yet. But the RAM is almost full. This solution was suggested by: https://github.com/tensorflow/tensorflow/issues/44176#issuecomment-830331981
def getDataset(data_root_path: Path, is_training: bool) -> tf.data.Dataset:
dirs = [x for x in data_root_path.iterdir() if x.is_dir()]
datasets = []
for dir in dirs:
annotation_path = dir / "annotations.json"
annotation = tf.io.read_file(str(annotation_path))
classes_path = dir / "classes.json"
classes = tf.io.read_file(str(classes_path))
dataset = createDataset(
dir,
annotation,
classes,
tf.cast(str(annotation_path), dtype=tf.string),
is_training=is_training,
)
datasets.append(dataset)
return (
tf.data.Dataset.sample_from_datasets(datasets)
.shuffle(100)
.batch(8)
)
def createDataset(
dir: Path,
annotation: tf.string,
classes: tf.string,
annotation_path: tf.string,
is_training: bool,
) -> tf.data.Dataset:
image_path_png = str(dir / "images" / "*.png")
image_path_PNG = str(dir / "images" / "*.PNG")
image_path_jpg = str(dir / "images" / "*.jpg")
image_path_JPG = str(dir / "images" / "*.JPG")
image_path_jpeg = str(dir / "images" / "*.jpeg")
image_path_JPEG = str(dir / "images" / "*.JPEG")
image_dirs = [
image_path_png,
image_path_PNG,
image_path_jpg,
image_path_JPG,
image_path_jpeg,
image_path_JPEG,
]
dataset = (
tf.data.Dataset.list_files(image_dirs)
.filter(lambda x: is_in_split(x, is_training))
.map(
lambda x: create_image_and_annotation(
x, annotation, classes, annotation_path
), num_parallel_calls=1
)
.map(resize_image, num_parallel_calls=1)
.map(rescale_to_image_size, num_parallel_calls=1)
)
return dataset
def create_image_and_annotation(
image_path: tf.string,
annotation: tf.string,
classes: tf.string,
annotation_path: tf.string,
) -> Annotation:
bits = tf.io.read_file(image_path)
file_split = tf.strings.split(image_path, "/")
image_name = file_split[-1]
suffix = tf.strings.split(image_name, ".")[-1]
jpeg = [
tf.convert_to_tensor("jpg", dtype=tf.string),
tf.convert_to_tensor("JPG", dtype=tf.string),
tf.convert_to_tensor("jpeg", dtype=tf.string),
tf.convert_to_tensor("JPEG", dtype=tf.string),
]
is_jpeg = [tf.math.equal(suffix, s) for s in jpeg]
png = [
tf.convert_to_tensor("png", dtype=tf.string),
tf.convert_to_tensor("PNG", dtype=tf.string),
]
if tf.math.reduce_any(is_jpeg):
image_shape = tf.io.extract_jpeg_shape(bits)
image = tf.io.decode_jpeg(bits, channels=3)
else:
image = tf.io.decode_png(bits, channels=3)
image_shape = tf.io.extract_jpeg_shape(tf.io.encode_jpeg(image))
lines = tf.py_function(
create_lines,
inp=[annotation, image_name, classes, image_shape, annotation_path],
Tout=tf.float32,
)
return Annotation(image, lines)
def create_lines(
annotation: tf.string,
image_name: tf.string,
classes: tf.string,
image_shape: tf.Tensor,
annotation_path: tf.string,
) -> tf.Tensor:
annotation_py = annotation.numpy()
annotation_json = json.loads(annotation_py)
key_py = image_name.numpy().decode("utf-8")
im_shape = image_shape.numpy()
class_py = classes.numpy()
class_json = json.loads(class_py)
translate_classes_id = {}
for (k, val) in enumerate(class_json):
translate_classes_id[val["id"]] = k
lines = np.zeros((parameters.nbr_of_lines, 5))
present_labels = getPresentLabels(annotation_json, key_py, translate_classes_id)
head_sides = []
leg_sides = []
if key_py in annotation_json.keys():
for annotation in annotation_json[key_py]["instances"]:
points = annotation["points"]
if not annotation["classId"] in translate_classes_id:
tf.print("Invalid class Id: {}".format(annotation["classId"]))
tf.print(annotation_path)
import pdb
pdb.set_trace()
continue
current_class_id = translate_classes_id[annotation["classId"]]
points = reorder_points(
points, label_order[current_class_id], present_labels
)
(w, h, scale) = rescaled_size(im_shape)
offset = (
(parameters.input_shape[0] - h) / 2,
(parameters.input_shape[1] - w) / 2,
)
normalized_points = [
(points[0] * scale + offset[1]) / parameters.input_shape[1],
(points[1] * scale + offset[0]) / parameters.input_shape[0],
(points[2] * scale + offset[1]) / parameters.input_shape[1],
(points[3] * scale + offset[0]) / parameters.input_shape[0],
]
if label_order[current_class_id] == "head_side":
head_sides.append(normalized_points)
elif label_order[current_class_id] == "leg_side":
leg_sides.append(normalized_points)
else:
row = row_order[label_order[current_class_id]]
lines[row, 0:4] = normalized_points
lines[row, 4] = 1
for k, head_side in enumerate(head_sides):
lines[k + row_order["head_side"], 0:4] = head_side
lines[k + row_order["head_side"], 4] = 1
for k, leg_side in enumerate(leg_sides):
lines[k + row_order["leg_side"], 0:4] = leg_side
lines[k + row_order["leg_side"], 4] = 1
return tf.convert_to_tensor(lines, tf.float32)
def resize_image(annotation: Annotation):
image = tf.image.resize_with_pad(
annotation.image,
parameters.input_shape[0],
parameters.input_shape[1],
method=ResizeMethod.BILINEAR,
)
return Annotation(image, annotation.lines)
def is_in_split(image_path: tf.string, is_training: bool) -> bool:
hash = tf.strings.to_hash_bucket_fast(image_path, 10)
if is_training:
return hash < 8
else:
return hash >= 8
def rescale_points(tf_pts) -> tf.Tensor:
pts = tf_pts.numpy()
pts[:, 0] *= parameters.input_shape[1]
pts[:, 1] *= parameters.input_shape[0]
pts[:, 2] *= parameters.input_shape[1]
pts[:, 3] *= parameters.input_shape[0]
invalid_rows = pts[:, 4] == 0
pts[invalid_rows, 0:4] = -100
return tf.constant(pts, dtype=tf.float32)
def rescale_to_image_size(annotation: Annotation) -> Annotation:
rescaled_points = tf.py_function(
rescale_points, inp=[annotation.lines], Tout=tf.float32
)
return (annotation.image, rescaled_points)