-1

i would like to keep all raw rows per key in "Select .. From .. GROUP BY .." query in Flink. I defined a AggregateFunction called RowToJsonAgg which aggregates rows into a Json string.

class RowToJsonAgg extends AggregateFunction[String, ListBuffer[String]]{
  def accumulate(accumulator: ListBuffer[String], row: Any*): Unit = {
   ....

// assume the row looks like " $field1_name, $field1_value, $field2_name, $field2_value, ... " // try to generate json from row. however seems Flink can't find this function when i ran the query }

  def merge(accumulator: ListBuffer[String], its: java.lang.Iterable[ListBuffer[String]]): Unit = {
    accumulator.append(
      WrapAsScala.iterableAsScalaIterable(its).flatten.toList:_*
    )
  }

  def resetAccumulator(accumulator: ListBuffer[String]): Unit = {
    accumulator.clear()
  }

  override def getValue(accumulator: ListBuffer[String]): String = {
    accumulator.mkString("{", ",", "}")
  }

  override def createAccumulator(): ListBuffer[String] = ListBuffer.empty

  override def getAccumulatorType(): TypeInformation[ListBuffer[String]] = {
    TypeInformation.of(classOf[ListBuffer[String]])
  }

  override def getResultType: TypeInformation[String] = TypeInformation.of(classOf[String])
}

Data class and the query look like the following:

case class Stock(id:Int, price: Int, volumn: Int, ts: Long)

val bbSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build()

val bbTableEnv = TableEnvironment.create(bbSettings)

bbTableEnv.createTemporarySystemFunction("row_to_json_agg", classOf[RowToJsonAgg])

val table = bbTableEnv.fromValues(...)

bbTableEnv.createTemporaryView("Stock", table)

bbTableEnv.executeSql(
    "select price, row_to_json_agg('volumn', volumn, 'ts', ts) as details from Stock group by price"
)

When I ran the application, i got SQL validation exception, the detailed message is "No match found for function signature row_to_json_agg(CHARACTER, NUMERIC, CHARACTER, NUMERIC)"

Seems the Flink can't find the right accumulate function to call.

If i declare the accumulate function as the following

def accumulate(accumulator: ListBuffer[String], volumn: Integer, ts: Long)

and changed the query like

"select price, row_to_json_agg(volumn, ts) from Stock group by price" 

i got the same exception and the message is "No match found for function signature row_to_json_agg( NUMERIC, NUMERIC)"

Any ideas how to make the aggregate function work?

Grant
  • 500
  • 1
  • 5
  • 18

1 Answers1

0

I figured it out by myself.

  1. register the UDF by running SQL like the following:

    bbTableEnv.executeSQL( String.format("create temporary function $udf_name as '%s'", "$full_class_name_of_your_udf") )

instead of

bbTableEnv.createTemporarySystemFunction("row_to_json_agg", classOf[RowToJsonAgg])
  1. prefer to use Java to implement UDF instead of Scala
Grant
  • 500
  • 1
  • 5
  • 18