I am building a pipeline using tensorflow extended. The dataset is already conversted to the tfrecord for the ingestion. I want to do data preprocessing such as scalin, data augmentation along with bbox and prepare data for the object detection model training. As per I understand TFX, this things has to be done in transform component. I am struggling in this part. If someone can suggest me an example or give some kind of hint how to move ahead then it would be very helpful.
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tfx
from tfx.components.evaluator.component import Evaluator
from tfx.components.example_gen.import_example_gen.component import ImportExampleGen
from tfx.components.example_validator.component import ExampleValidator
from tfx.components.model_validator.component import ModelValidator
from tfx.components.pusher.component import Pusher
from tfx.components.schema_gen.component import SchemaGen
from tfx.components.statistics_gen.component import StatisticsGen
from tfx.components.trainer.component import Trainer
from tfx.components.transform.component import Transform
from tfx.orchestration.experimental.interactive.interactive_context import (
InteractiveContext,
)
from tfx.proto import evaluator_pb2
from tfx.proto import example_gen_pb2
from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
from tfx.v1 import proto
_dataset_tfrecord_path = ""
_pipeline_name = "od_pipeline"
output_conf = proto.Output(
split_config=example_gen_pb2.SplitConfig(
splits=[
proto.SplitConfig.Split(name="train", hash_buckets=4),
proto.SplitConfig.Split(name="eval", hash_buckets=1),
]
)
)
example_gen = ImportExampleGen(input_base=_dataset_tfrecord_path, output_config=output_conf)
statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])
infer_schema = SchemaGen(statistics=statistics_gen.outputs["statistics"])
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs["statistics"], schema=infer_schema.outputs["schema"]
)
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['result'],
module_file=TRANSFORM_MODULE
)
I have tried to find the solution online but couldn't get anything useful.