I have a problem that my window aggregation accumulates all results, but do not return it, and my result stream is empty I suspect it has something to do with windows triggering, but cannot figure out how
env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
class MyTimestampAssigner(TimestampAssigner):
def extract_timestamp(self, element, previous_element_timestamp):
date_str = element[0].strftime('%Y-%m-%d')
# print(element,datetime.strptime(date_str, '%Y-%m-%d').timestamp())
return datetime.strptime(date_str, '%Y-%m-%d').timestamp()
joined_data_stream = joined_data_stream.assign_timestamps_and_watermarks(
WatermarkStrategy
.for_bounded_out_of_orderness(Duration.of_days(1))
.with_timestamp_assigner(MyTimestampAssigner())
)
keyed_stream = joined_data_stream.key_by(lambda x: x[1])
class AverageAggregate(functions.AggregateFunction):
def create_accumulator(self) -> [int, int]:
return 0, 0
def add(self, value, accumulator):
print('ADDING')
return accumulator[0]+value[3] , accumulator[1] + 1
def merge(self, a: [int, int], b: [int, int]) -> [int, int]:
print('MERGING')
return a[0] + b[0], a[1] + b[1]
def get_result(self, accumulator):
print('GETTING',accumulator[0] / accumulator[1])
return accumulator[0] / accumulator[1]
avg_stream = joined_data_stream.key_by(lambda x: x[1]).window(SlidingEventTimeWindows.of(Time.days(2),Time.days(1))) \
.aggregate(AverageAggregate(),accumulator_type=Types.TUPLE([Types.LONG(), Types.LONG()]),output_type=Types.DOUBLE())
Here is all the code I have now, would be grateful for any tip given
UPDATE The problem was my timestamp assignments, replacing this
return datetime.strptime(date_str, '%Y-%m-%d').timestamp()
With this
return int(datetime.strptime(date_str, '%Y-%m-%d').timestamp()*1000)
Fixed it all