0

I have docker compose file that spins up below containers.

  • Spark iceberg
  • Hive metasotre
  • mariaDB
  • kafka
  • trino
  • minio

My aim is to read streaming data from kafka container and write it into iceberg table.I am using pyspark for this in a notebook.

After reading from the kafka topic , I tried to write dataframe into iceberg table.

value_df.writeStream.format("iceberg").outputMode("append").option("path", 'catalog_name.default.orders').option("checkpointLocation", 's3a://catalog_name/checkpointlocation').start()

Hive container comes preconfigured with iceberg connector and it exposes a single catalog called catalog_name.

I am getting the following error on this statement.

Py4JJavaError: An error occurred while calling o327.start. org.apache.iceberg.hive.RuntimeMetaException: Failed to connect to Hive Metastore at org.apache.iceberg.hive.HiveClientPool.newClient(HiveClientPool.java:72) at org.apache.iceberg.hive.HiveClientPool.newClient(HiveClientPool.java:35) at org.apache.iceberg.ClientPoolImpl.get(ClientPoolImpl.java:125) at org.apache.iceberg.ClientPoolImpl.run(ClientPoolImpl.java:56) at org.apache.iceberg.ClientPoolImpl.run(ClientPoolImpl.java:51) at org.apache.iceberg.hive.CachedClientPool.run(CachedClientPool.java:76) at org.apache.iceberg.hive.HiveTableOperations.doRefresh(HiveTableOperations.java:193)

hive-site.xml look like this.

<?xml version="1.0"?>
<configuration>

    <property>
        <name>iceberg.engine.hive.enabled</name>
        <value>true</value>
    </property>

</configuration>

metastore-site.xml is:

<configuration>
    <property>
        <name>metastore.thrift.uris</name>
        <value>thrift://hive-metastore:9083</value>
        <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
    </property>
    <property>
        <name>metastore.task.threads.always</name>
        <value>org.apache.hadoop.hive.metastore.events.EventCleanerTask,org.apache.hadoop.hive.metastore.MaterializationsCacheCleanerTask</value>
    </property>
    <property>
        <name>metastore.expression.proxy</name>
        <value>org.apache.hadoop.hive.metastore.DefaultPartitionExpressionProxy</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.cj.jdbc.Driver</value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value></value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value></value>
    </property>

    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value></value>
    </property>

    <property>
        <name>fs.s3a.access.key</name>
        <value></value>
    </property>
    <property>
        <name>fs.s3a.secret.key</name>
        <value></value>
    </property>
    <property>
        <name>fs.s3a.endpoint</name>
        <value>http://minio:9000</value>
    </property>
    <property>
        <name>fs.s3a.path.style.access</name>
        <value>true</value>
    </property>
    <property>
        <name>fs.s3a.connection.ssl.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>hive.metastore.disallow.incompatible.col.type.changes</name>
        <value>false</value>
    </property>
</configuration>

docker compose look like this:

mariadb:
    image: 'mariadb:latest'
    hostname: mariadb
    ports:
      - '3306:3306'
    environment:
      MYSQL_ROOT_PASSWORD: xxx
      MYSQL_USER: xxx
      MYSQL_PASSWORD: xxx 
      MYSQL_DATABASE: xxx
    volumes:
      - mariadb-data:/var/lib/mysql
    networks:
      - trino-network

  hive-metastore:
      hostname: hive-metastore
      image: 'bitsondatadev/hive-metastore:latest'
      ports:
        - '9083:9083' # Metastore Thrift
      volumes:
        - ./conf/metastore-site.xml:/opt/apache-hive-metastore-3.0.0-bin/conf/metastore-site.xml:ro
        - ./libs/iceberg-hive-runtime-0.14.0.jar:/opt/apache-hive-metastore-3.0.0-bin/lib/iceberg-hive-runtime-0.14.0.jar  
      environment:
        METASTORE_DB_HOSTNAME: mariadb
      depends_on:
        - mariadb
      networks:
        - trino-network


park-iceberg:
    image: tabulario/spark-iceberg:3.3.0_0.14.0
    depends_on:
      - hive-metastore
    container_name: spark-iceberg
    environment:
      - AWS_ACCESS_KEY_ID=xxx
      - AWS_SECRET_ACCESS_KEY=xx
      - AWS_S3_ENDPOINT=http://minio:9000
    volumes:
      - ./notebooks:/home/iceberg/notebooks
      - ./spark-apps:/home/iceberg/spark-apps
      - ./conf/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
      - ./conf/core-site.xml:/opt/spark/conf/core-site.xml
      - ./conf/core-default.xml:/opt/spark/conf/core-default.xml
      - ./conf/hive-site.xml:/opt/spark/conf/hive-site.xml
      - ./conf/metastore-site.xml:/opt/spark/conf/metastore-site.xml
      - ./libs/hadoop-aws-3.3.4.jar:/opt/spark/jars/hadoop-aws-3.3.4.jar
      - ./libs/hadoop-common-3.3.4.jar:/opt/spark/jars/hadoop-common-3.3.4.jar
      - ./libs/spark-sql-kafka-0-10_2.12-3.3.0.jar:/opt/spark/jars/spark-sql-kafka-0-10_2.12-3.3.0.jar
      - ./libs/kafka-clients-2.8.1.jar:/opt/spark/jars/kafka-clients-2.8.1.jar
      - ./libs/jsr305-3.0.0.jar:/opt/spark/jars/jsr305-3.0.0.jar
      - ./libs/lz4-java-1.7.1.jar:/opt/spark/jars/lz4-java-1.7.1.jar
      - ./libs/spark-tags_2.12-3.3.0.jar:/opt/spark/jars/spark-tags_2.12-3.3.0.jar
      - ./libs/spark-token-provider-kafka-0-10_2.12-3.3.0.jar:/opt/spark/jars/spark-token-provider-kafka-0-10_2.12-3.3.0.jar
      - ./libs/commons-pool2-2.11.1.jar:/opt/spark/jars/commons-pool2-2.11.1.jar
      - ./libs/commons-logging-1.1.3.jar:/opt/spark/jars/commons-logging-1.1.3.jar
      - ./libs/aws-java-sdk-bundle-1.12.283.jar:/opt/spark/jars/aws-java-sdk-bundle-1.12.283.jar
    ports:
      - 8888:8888
      - 8080:8080
      - 18080:18080
      - 4040:4040      
    command: pyspark-notebook
    networks:
      - trino-network

I am not able to find any issues with port/network mapping. Any help here would be much appreciated.

erik258
  • 14,701
  • 2
  • 25
  • 31
Sh_ch
  • 1

0 Answers0