0

As mentioned in this answer, one can actually work around using VarLenFeature by encoding variable length arrays into bytes first and then using FixedLenFeature. Are there any significant benefits/drawbacks concerning either approach? Example code below.

import numpy as np
import tensorflow as tf

BBOX_COUNT = 13
bboxes = np.random.randint(0, 10_000, size=(4, BBOX_COUNT), dtype=np.int64)


def using_fixed_len(bboxes):
    feat = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bboxes.tobytes()]))
    ex = tf.train.Example(features=tf.train.Features(feature={"my": feat}))

    desc = {"my": tf.io.FixedLenFeature([], tf.string)}
    out = tf.io.parse_single_example(ex.SerializeToString(), desc)
    byte_string = out["my"].numpy()
    # need to specify both dtype (else float) and shape (flat by default)..
    reloaded = np.frombuffer(byte_string, dtype=np.int64).reshape(4, -1)

    np.testing.assert_array_equal(bboxes, reloaded)


def using_var_len(bboxes):
    flat_list = sum(bboxes.tolist(), [])
    feat = tf.train.Feature(int64_list=tf.train.Int64List(value=flat_list))
    ex = tf.train.Example(features=tf.train.Features(feature={"my": feat}))

    desc = {"my": tf.io.VarLenFeature(tf.int64)}
    out = tf.io.parse_single_example(ex.SerializeToString(), desc)
    arr = tf.sparse.to_dense(out["my"]).numpy()
    reloaded = arr.reshape(4, -1)

    np.testing.assert_array_equal(bboxes, reloaded)

gebbissimo
  • 2,137
  • 2
  • 25
  • 35

0 Answers0