As mentioned in this answer, one can actually work around using VarLenFeature
by encoding variable length arrays into bytes first and then using FixedLenFeature
. Are there any significant benefits/drawbacks concerning either approach? Example code below.
import numpy as np
import tensorflow as tf
BBOX_COUNT = 13
bboxes = np.random.randint(0, 10_000, size=(4, BBOX_COUNT), dtype=np.int64)
def using_fixed_len(bboxes):
feat = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bboxes.tobytes()]))
ex = tf.train.Example(features=tf.train.Features(feature={"my": feat}))
desc = {"my": tf.io.FixedLenFeature([], tf.string)}
out = tf.io.parse_single_example(ex.SerializeToString(), desc)
byte_string = out["my"].numpy()
# need to specify both dtype (else float) and shape (flat by default)..
reloaded = np.frombuffer(byte_string, dtype=np.int64).reshape(4, -1)
np.testing.assert_array_equal(bboxes, reloaded)
def using_var_len(bboxes):
flat_list = sum(bboxes.tolist(), [])
feat = tf.train.Feature(int64_list=tf.train.Int64List(value=flat_list))
ex = tf.train.Example(features=tf.train.Features(feature={"my": feat}))
desc = {"my": tf.io.VarLenFeature(tf.int64)}
out = tf.io.parse_single_example(ex.SerializeToString(), desc)
arr = tf.sparse.to_dense(out["my"]).numpy()
reloaded = arr.reshape(4, -1)
np.testing.assert_array_equal(bboxes, reloaded)