I'm a bit new to tensorflow and I'm trying to create an input pipeline based on tfrecord file. Each entry in the file contains three field: 2 strings with paths to 2 image file and 1 float tensor (the labels for the example). I'm able to write and read back again the information, unfortunately I have some problem keeping image and labels synchronized.
To save the records I'm using this piece of code
writer = tf.python_io.TFRecordWriter(output_tfrecord)
...
for index in shuffled_indexes:
example = tf.train.Example(
features=tf.train.Features(
feature={
'label': tf.train.Feature(float_list=tf.train.FloatList(value=target.ravel().tolist()),
'image_1': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_1.encode()])),
'image_2': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_2.encode()]))
}
)
)
writer.write(example.SerializeToString())
writer.close()
And to read it back again this one (for this example I'm ignoring field 'image_2' in each record):
def read_and_decode(filename, target_shape):
# first construct a queue containing a list of filenames.
# this lets a user split up there dataset in multiple files to keep
# size down
filename_queue = tf.train.string_input_producer(filename,num_epochs=None)
#symbolic reader to read one example at a time
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'label': tf.FixedLenFeature(target_shape, tf.float32),
'image_1': tf.FixedLenFeature([], tf.string),
'image_2': tf.FixedLenFeature([], tf.string)
}
)
img_filename_queue = tf.train.string_input_producer([features['image_1']],shuffle=False)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(img_filename_queue)
image = tf.image.decode_jpeg(image_file, channels=3)
with tf.control_dependencies([image]):
label = features['label']
return image,label
Each couple image and label are an example from my training set. If I try to run them in a single session what I get are not synchronized result, e.g. in a toy example with just two records in the tfrecord file the image and label are exchanged: first label with second image and vice versa.
Example of my session code:
image,label = read_and_decode([outputfileName],result_shape)
with tf.Session() as sess:
# Start the queue runners (input threads)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(2):
img,trg = sess.run([image,label])
ioUtils.visualizeLabel(img,trg)
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
Any advice on what I'm doing wrong?