1

I’ve been doing this tutorial: https://databricks.com/notebooks/geomesa-h3-notebook.html

The polygon function (polygonToH3) works perfectly but however the multi polygon function is giving strange results. expecting millions of H3 indexes but generated only a few hundred indexes for an area of at least 10,000 km2

val multiPolygonToH3 = udf{ (geometry: Geometry, resolution: Int) => 
  var points: List[GeoCoord] = List()
  var holes: List[java.util.List[GeoCoord]] = List()
  if (geometry.getGeometryType == "MultiPolygon") {
    val numGeometries = geometry.getNumGeometries()
    if (numGeometries > 0) {
      points = List(
        geometry
          .getGeometryN(0)
          .getCoordinates()
          .toList
          .map(coord => new GeoCoord(coord.y, coord.x)): _* )
    }
    if (numGeometries > 1) {
      holes = (1 to (numGeometries - 1)).toList.map(n => {
        List(
          geometry
            .getGeometryN(n)
            .getCoordinates()
            .toList
            .map(coord => new GeoCoord(coord.y, coord.x)): _*).asJava 
      })
    }
  }
  H3.instance.polyfill(points, holes.asJava, resolution).toList 
}
This is the code that calls the above function: 

def ConvertMultiPolyH3(iLGA:Int) : Boolean = {

  var batch_start = System.currentTimeMillis()
  val res = 12
  var sSQL = s"""SELECT * from tca_test_dl.lga 
             WHERE lga_wkt is not null and lga_id = $iLGA and GeometryType = 'MultiPolygon'""".stripMargin
  val dfLGA = sparkSession.sql(sSQL)

  val wktdfLGA = dfLGA.withColumn("lga_geom", st_geomFromWKT(col("lga_wkt")))
                        .withColumn("lga_id", col("lga_id").cast(LongType))
                        .withColumn("state_id", col("state_id").cast(LongType)).cache

  val dfLGA_H3 = wktdfLGA.withColumn("h3_index", multiPolygonToH3(col("lga_geom"),lit(res))).withColumn("h3_index", explode($"h3_index"))
  dfLGA_H3.printSchema()

  val dfLGA_New = dfLGA_H3.drop("lga_wkt", "lga_geom")
  //dfLGA_New.write.mode("append").format("delta").partitionBy("lga_id").save("/mnt/cont-tca-test/delta_lake/lga_h3")
  println(dfLGA_New.count())
  var batch_time = (System.currentTimeMillis() - batch_start)/1000.0/60.0
  batch_time = BigDecimal(batch_time).setScale(2, BigDecimal.RoundingMode.HALF_UP).toDouble
  println(s"LGA H3 conversion for LGA: $iLGA has been processed in $batch_time minutes")

  dfLGA.unpersist()
  wktdfLGA.unpersist()
  dfLGA_H3.unpersist()
  dfLGA_New.unpersist()
  val result = true
  return result
}
Jonah Nio
  • 31
  • 1
  • Have you tried visualizing the resulting H3 cells and the multipolygons together? H3's polyfill method may be avoiding the holes in the multipolygon or something like that. Also, have you tried plugging in coarser resolutions? That may provide some insight into how H3 is covering the geometries. – GeoJim Jun 07 '20 at 01:32
  • Hi thanks alot for the reply. It turned out that the polygon was too big for resolution = 12. I reduced the res to 10 and it worked. Thanks alot – Jonah Nio Jun 14 '20 at 10:19

0 Answers0