Data Augmentation with tf.data

Question

I'd like to incorporate Data Augmentation with tf.data into my tf.data pipeline. For applying data augmentation I'm using TenserFlow's preprocessing module and Sequential class. When incorporating a series of data augmentation operations into my tf.data pipeline my dataset type is changing from MapDataset to PrefetchDataset and/or the tensor shape is changing from ...

(32, 224, 224, 3)
(32,)

to ...

(224, 224, 3)
()

With that my tensors are in the wrong shape.

Here is my code:

import numpy as np    
import time
import PIL.Image as Image

import tensorflow as tf
import tensorflow_hub as hub 

import datetime
import os

from tensorflow import keras
from tensorflow.keras import layers


# Data Preprocessing

normalization_layer = tf.keras.Sequential([
    layers.Rescaling(1./255),
    layers.Resizing(img_height,img_width)
])
 

# Data Augmentation

data_augmentation = tf.keras.Sequential([
    layers.RandomZoom(height_factor=(0.05), width_factor=(0.05)),
    layers.RandomRotation(factor=0.1),
    layers.RandomContrast(factor=0.1),
])



# incorporate data augmentation into trainingset pipeline

AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((image_batch, labels_batch))

aug_train_ds= (
        train_ds
        .shuffle(batch_size*100)
        .batch(batch_size)
        .map(lambda x, y: (normalization_layer(x, training=True), y))
        .map(lambda x, y: (data_augmentation(x, training= True), y),
            num_parallel_calls=AUTOTUNE)
        .cache()
        .prefetch(buffer_size=AUTOTUNE) 
) 

 # incorporate data augmentation into validationset pipeline

 aug_val_ds= (
    val_ds
    .batch(batch_size)
    .cache()
    .map(lambda x, y: (normalization_layer(x), y)) 
    .prefetch(buffer_size=AUTOTUNE) # Smooth data loader pipeline
)

this kind of looks like it is unbatched now again. Have you tried mapping first and then to batch it? — Finn Meyer, Nov 03 '22 at 12:10

Data Augmentation with tf.data

0 Answers0