I'd like to incorporate Data Augmentation with tf.data into my tf.data pipeline. For applying data augmentation I'm using TenserFlow's preprocessing module and Sequential class. When incorporating a series of data augmentation operations into my tf.data pipeline my dataset type is changing from MapDataset to PrefetchDataset and/or the tensor shape is changing from ...
(32, 224, 224, 3)
(32,)
to ...
(224, 224, 3)
()
With that my tensors are in the wrong shape.
Here is my code:
import numpy as np
import time
import PIL.Image as Image
import tensorflow as tf
import tensorflow_hub as hub
import datetime
import os
from tensorflow import keras
from tensorflow.keras import layers
# Data Preprocessing
normalization_layer = tf.keras.Sequential([
layers.Rescaling(1./255),
layers.Resizing(img_height,img_width)
])
# Data Augmentation
data_augmentation = tf.keras.Sequential([
layers.RandomZoom(height_factor=(0.05), width_factor=(0.05)),
layers.RandomRotation(factor=0.1),
layers.RandomContrast(factor=0.1),
])
# incorporate data augmentation into trainingset pipeline
AUTOTUNE = tf.data.AUTOTUNE
train_ds = tf.data.Dataset.from_tensor_slices((image_batch, labels_batch))
aug_train_ds= (
train_ds
.shuffle(batch_size*100)
.batch(batch_size)
.map(lambda x, y: (normalization_layer(x, training=True), y))
.map(lambda x, y: (data_augmentation(x, training= True), y),
num_parallel_calls=AUTOTUNE)
.cache()
.prefetch(buffer_size=AUTOTUNE)
)
# incorporate data augmentation into validationset pipeline
aug_val_ds= (
val_ds
.batch(batch_size)
.cache()
.map(lambda x, y: (normalization_layer(x), y))
.prefetch(buffer_size=AUTOTUNE) # Smooth data loader pipeline
)