Multi Layer Tiff labelled dataset conversion to format that tensor flow can use for model optimisation

Question

I'm a Python and Tensor Flow newbie, and was wondering...

How best to convert a labelled dataset of Multi-Layer Tiffs into a format that Tensor Flow can use for model optimisation / fine tuning ?

I currently have this code that puts each layer of a folder of Multi-Tiffs into a 3D Array, but i need to preserve the label or filename of the Multi-Tiffs. I have seen some tensor flow scripts to convert to TFRecords, however, I'm not sure if these preserve the file name ? How best would you go about this ? It will be quite a big dataset.

Any help much appreciated

import os # For file handling 
from PIL import Image# Import Pillow image processing library 
import numpy 
CroppedMultiTiffs = "MultiTiffs/" 

for filename in os.listdir(MultiTiffs): 
## Imports Multi-Layer TIFF into 3D Numpy Array.

    img = Image.open(MultiTiffs + filename) 
    imgArray = numpy.zeros( ( img.n_frames, img.size[1], img.size[0] ),numpy.uint8 )
try: 
# for frames in range, img.n_frames for whole folder. 
    for frame in range(2,img.n_frames): 
        img.seek( frame ) 
        imgArray[frame,:,:] = img 
        frame = frame + 1
except (EOFError): img.seek( 0 ) 
    # output error if it doesn't find a file.
pass

print(imgArray.shape) # imgArray is now 3D 
print(imgArray.size)

best wishes

TWP

OneWorld · Answer 1 · 2017-07-18T13:09:16.607

okay, so I figured it out using the thread from Daniils blog http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/

However my current implimentation creates multiple TFRecords, and I think it needs to be a single TFRecord, so trying to figure out how to make it a single TFRecord. How do I do that?

Then I can validate it using a TFRecord Reading script to read it back and check it is in the right format for Tensor Flow. I currently get errors using the reading script.

from PIL import Image
import numpy as np
import tensorflow as tf
import os

def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

path = 'test/'
output = 'output/'

fileList = [os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(path) for f in files if f.endswith('.tif')]

print (fileList)
for filename in fileList:

    basename = os.path.basename(filename)
    file_name = basename[:-4]
    print ("processing file: " , filename)
    print (file_name)

    if not os.path.exists(output):
        os.mkdir(output)

    writer = tf.python_io.TFRecordWriter(output+ file_name + '.tfrecord')
    img = Image.open(filename)
    imgArray = np.zeros( ( img.n_frames, img.size[1], img.size[0] ),np.uint8 )
    ##   Imports Multi-Layer file into 3D Numpy Array.
    try:
        for frame in range(0,img.n_frames):
            img.seek( frame )
            imgArray[frame,:,:] = img
            frame = frame + 1
    except (EOFError): img.seek( 0 ) 

    pass

    print ("print img size:" , img.size)
    print ("print image shape: " , imgArray.shape)
    print ("print image size: " , imgArray.size)

    annotation = np.array(Image.open(filename))

    height = imgArray.shape[0]
    width = imgArray.shape[1]
    depth = imgArray.shape[2]

    img_raw = imgArray.tostring()
    annotation_raw = annotation.tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'height': _int64_feature(height),
        'width': _int64_feature(width),
        'depth': _int64_feature(depth), # for 3rd dimension
        'image_raw': _bytes_feature(img_raw),
        'mask_raw': _bytes_feature(annotation_raw)}))

    writer.write(example.SerializeToString())

My current TFRecords Reading script

import tensorflow as tf
import os

def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      # Defaults are not specified since both keys are required.
      features={
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.FixedLenFeature([], tf.int64),
          'height': tf.FixedLenFeature([], tf.int64),
          'width': tf.FixedLenFeature([], tf.int64),
          'depth': tf.FixedLenFeature([], tf.int64)
      })
  image = tf.decode_raw(features['image_raw'], tf.uint8)
  label = tf.cast(features['label'], tf.int32) 
  height = tf.cast(features['height'], tf.int32) 
  width = tf.cast(features['width'], tf.int32)
  depth = tf.cast(features['depth'], tf.int32)
  return image, label, height, width, depth

with tf.Session() as sess:
  filename_queue = tf.train.string_input_producer(["output/A.3.1.tfrecord"])
  image, label, height, width, depth = read_and_decode(filename_queue)
  image = tf.reshape(image, tf.stack([height, width, 3]))
  image.set_shape([32,32,3])
  init_op = tf.initialize_all_variables()
  sess.run(init_op)
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord)
  for i in range(1000):
    example, l = sess.run([image, label])
    print (example,l)
  coord.request_stop()
  coord.join(threads)

receiving the error:-

InvalidArgumentError (see above for traceback): Name: , Feature: label (data type: int64) is required but could not be found.

Images are grayscale multi-page

Multi Layer Tiff labelled dataset conversion to format that tensor flow can use for model optimisation

1 Answers1