I am having a problem with the StatisticsGen component of TFX. I use TFRecords and as input data I use RaggedTensors (The TFrecord is created with SequenceExample) After using ExampleGen which splits the file correctly into train and val, StatisticsGen creates a 0kb file and therefore is not working correctly.
Can anyone confirm me that the RaggedTensors are supported by TFX? If so, can you help me generate the statistics?
TF Version: 2.4.1 Eager mode: True TFX Version: 0.28.0 TFDV version: 0.28.0 TFT version: 0.28.0 TFMA version: 0.28.0 Hub version: 0.9.0 Beam version: 2.28.0
!pip install tfx
import tensorflow as tf
from tfx.components import ImportExampleGen
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.utils.dsl_utils import external_input
from tfx.proto import example_gen_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
import tfx
import os
import apache_beam as beam
import tensorflow_data_validation as tfdv
import tensorflow_transform as tft
import tensorflow_model_analysis as tfma
import tensorflow_hub as hub
import logging
path_to_test = os.path.join("tf_records")
context = InteractiveContext(pipeline_root='pipeline_root')
#examplegen
example_gen = ImportExampleGen(input_base=path_to_test)
context.run(example_gen, enable_cache=True)
print(example_gen.outputs['examples'].get()[0].uri)
train_uri = os.path.join(example_gen.outputs['examples'].get()[0].uri, 'train')
tfrecord_filenames = [os.path.join(train_uri, name)
for name in os.listdir(train_uri)]
#statisticsgen
statistics_gen = StatisticsGen(
examples=example_gen.outputs['examples'])
context.run(statistics_gen)
context.show(statistics_gen.outputs['statistics'])