I am trying to create a custom window, and right now I am facing problems with serialization.
I have this as my current serializer
class CustomSerializer(TypeSerializer):
def serialize(self, element, stream) -> None:
bytes_data = self._underlying_coder.encode(''.join(element).encode('UTF-8'))
stream.write(bytes_data)
def deserialize(self, stream) -> Types.LIST([Types.STRING(), Types.INT(), Types.INT(), Types.INT(), Types.INT(), Types.STRING()]):
tmp = stream.read_utf8().rstrip(')').lstrip('(')
tmp_list = tmp.split(",")
for i in range(len(tmp_list[1:-1])):
tmp_list[i] = int(tmp_list[i])
return tmp
And here is my window assigner
class MonthlyWindowAssigner(WindowAssigner):
def assign_windows(self, element, timestamp, ctx):
dt = datetime.fromtimestamp(timestamp // 1000)
month_start = dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
month_end = dt.replace(day=calendar.monthrange(dt.year, dt.month)[1], hour=23, minute=59, second=59, microsecond=999999)
return [TimeWindow(month_start.timestamp() * 1000, month_end.timestamp() * 1000)]
def get_default_trigger(self, ctx):
return None
def is_event_time(self):
return True
def get_window_serializer(self) -> TypeSerializer[TimeWindow]:
return CustomSerializer()
And aggregator. It works with other windows,so it has something to do custom windowing.
class AverageAggregateRealTime(functions.AggregateFunction):
def create_accumulator(self) -> Tuple[int,str,float, int, list]:
return 0,'', 0.0, 0, list(set())
def add(self, value, accumulator: Tuple[int,str,float, int, list]) -> Tuple[int,str,float, int, list]:
x=set(accumulator[4])
return value[1],value[5],accumulator[2] + value[3], accumulator[3] + 1, list(x.union({value[2]}))
def merge(self, a: Tuple[int,str,float, int, list], b: Tuple[int,str,float, int, list]) -> Tuple[int,str,float, int, list]:
return b[0],b[1],a[2] + b[2], a[3] + b[3], list([a[4]].union(b[4]))
def get_result(self, accumulator: Tuple[int,str,float, int, list]) -> Tuple[int,str,float, int, int]:
return accumulator[0],accumulator[1],accumulator[2] / accumulator[3], accumulator[3],len(accumulator[4])
Right now I am facing AttributeError: 'CoderAdapterIml' object has no attribute 'encode', any idea how I should handle it?
I really appreciate any help you can provide.
Also providing traceback, seems like something is wrong with the accumulator, but not sure
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/datastream/process/input_handler.py", line 53, in process_element
yield from _emit_results(timestamp,
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/datastream/process/input_handler.py", line 131, in _emit_results
for result in results:
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/datastream/window/window_operator.py", line 399, in process_element
self.window_state.add(value)
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/state_impl.py", line 273, in add
accumulator = self._get_accumulator()
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/state_impl.py", line 289, in _get_accumulator
for i in self.get_internal_state().read():
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/state_impl.py", line 147, in get_internal_state
self._internal_state = self._remote_state_backend._get_internal_bag_state(
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/state_impl.py", line 1056, in _get_internal_bag_state
encoded_namespace = self._encode_namespace(namespace)
File "/home/test/.local/lib/python3.10/site-packages/pyflink/fn_execution/state_impl.py", line 1121, in _encode_namespace
encoded_namespace = self._namespace_coder_impl.encode(namespace)
AttributeError: 'CoderAdapterIml' object has no attribute 'encode'